fpu/softfloat-macros.h

   1 /*
   2  * QEMU float support macros
   3  *
   4  * Derived from SoftFloat.
   5  */
   6
   7 /*============================================================================
   8
   9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  10 Arithmetic Package, Release 2b.
  11
  12 Written by John R. Hauser.  This work was made possible in part by the
  13 International Computer Science Institute, located at Suite 600, 1947 Center
  14 Street, Berkeley, California 94704.  Funding was partially provided by the
  15 National Science Foundation under grant MIP-9311980.  The original version
  16 of this code was written as part of a project to build a fixed-point vector
  17 processor in collaboration with the University of California at Berkeley,
  18 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  20 arithmetic/SoftFloat.html'.
  21
  22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  24 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
  29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  30
  31 Derivative works are acceptable, even for commercial purposes, so long as
  32 (1) the source code for the derivative work includes prominent notice that
  33 the work is derivative, and (2) the source code includes prominent notice with
  34 these four paragraphs for those parts of this code that are retained.
  35
  36 =============================================================================*/
  37
  38 /*----------------------------------------------------------------------------
  39 | This macro tests for minimum version of the GNU C compiler.
  40 *----------------------------------------------------------------------------*/
  41 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
  42 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \
  43          ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  44 #else
  45 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
  46 #endif
  47
  48
  49 /*----------------------------------------------------------------------------
  50 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  51 | bits are shifted off, they are ``jammed'' into the least significant bit of
  52 | the result by setting the least significant bit to 1.  The value of `count'
  53 | can be arbitrarily large; in particular, if `count' is greater than 32, the
  54 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  55 | The result is stored in the location pointed to by `zPtr'.
  56 *----------------------------------------------------------------------------*/
  57
  58 INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
  59 {
  60     uint32_t z;
  61
  62     if ( count == 0 ) {
  63         z = a;
  64     }
  65     else if ( count < 32 ) {
  66         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  67     }
  68     else {
  69         z = ( a != 0 );
  70     }
  71     *zPtr = z;
  72
  73 }
  74
  75 /*----------------------------------------------------------------------------
  76 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  77 | bits are shifted off, they are ``jammed'' into the least significant bit of
  78 | the result by setting the least significant bit to 1.  The value of `count'
  79 | can be arbitrarily large; in particular, if `count' is greater than 64, the
  80 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  81 | The result is stored in the location pointed to by `zPtr'.
  82 *----------------------------------------------------------------------------*/
  83
  84 INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
  85 {
  86     uint64_t z;
  87
  88     if ( count == 0 ) {
  89         z = a;
  90     }
  91     else if ( count < 64 ) {
  92         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  93     }
  94     else {
  95         z = ( a != 0 );
  96     }
  97     *zPtr = z;
  98
  99 }
 100
 101 /*----------------------------------------------------------------------------
 102 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
 103 | _plus_ the number of bits given in `count'.  The shifted result is at most
 104 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
 105 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
 106 | shifted off is the most-significant bit of the extra result, and the other
 107 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
 108 | bits shifted off were all zero.  This extra result is stored in the location
 109 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
 110 |     (This routine makes more sense if `a0' and `a1' are considered to form
 111 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
 112 | point value is shifted right by the number of bits given in `count', and
 113 | the integer part of the result is returned at the location pointed to by
 114 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 115 | described above, and is returned at the location pointed to by `z1Ptr'.)
 116 *----------------------------------------------------------------------------*/
 117
 118 INLINE void
 119  shift64ExtraRightJamming(
 120      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 121 {
 122     uint64_t z0, z1;
 123     int8 negCount = ( - count ) & 63;
 124
 125     if ( count == 0 ) {
 126         z1 = a1;
 127         z0 = a0;
 128     }
 129     else if ( count < 64 ) {
 130         z1 = ( a0<<negCount ) | ( a1 != 0 );
 131         z0 = a0>>count;
 132     }
 133     else {
 134         if ( count == 64 ) {
 135             z1 = a0 | ( a1 != 0 );
 136         }
 137         else {
 138             z1 = ( ( a0 | a1 ) != 0 );
 139         }
 140         z0 = 0;
 141     }
 142     *z1Ptr = z1;
 143     *z0Ptr = z0;
 144
 145 }
 146
 147 /*----------------------------------------------------------------------------
 148 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 149 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 150 | of `count' can be arbitrarily large; in particular, if `count' is greater
 151 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
 152 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 153 *----------------------------------------------------------------------------*/
 154
 155 INLINE void
 156  shift128Right(
 157      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 158 {
 159     uint64_t z0, z1;
 160     int8 negCount = ( - count ) & 63;
 161
 162     if ( count == 0 ) {
 163         z1 = a1;
 164         z0 = a0;
 165     }
 166     else if ( count < 64 ) {
 167         z1 = ( a0<<negCount ) | ( a1>>count );
 168         z0 = a0>>count;
 169     }
 170     else {
 171         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
 172         z0 = 0;
 173     }
 174     *z1Ptr = z1;
 175     *z0Ptr = z0;
 176
 177 }
 178
 179 /*----------------------------------------------------------------------------
 180 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 181 | number of bits given in `count'.  If any nonzero bits are shifted off, they
 182 | are ``jammed'' into the least significant bit of the result by setting the
 183 | least significant bit to 1.  The value of `count' can be arbitrarily large;
 184 | in particular, if `count' is greater than 128, the result will be either
 185 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 186 | nonzero.  The result is broken into two 64-bit pieces which are stored at
 187 | the locations pointed to by `z0Ptr' and `z1Ptr'.
 188 *----------------------------------------------------------------------------*/
 189
 190 INLINE void
 191  shift128RightJamming(
 192      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 193 {
 194     uint64_t z0, z1;
 195     int8 negCount = ( - count ) & 63;
 196
 197     if ( count == 0 ) {
 198         z1 = a1;
 199         z0 = a0;
 200     }
 201     else if ( count < 64 ) {
 202         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 203         z0 = a0>>count;
 204     }
 205     else {
 206         if ( count == 64 ) {
 207             z1 = a0 | ( a1 != 0 );
 208         }
 209         else if ( count < 128 ) {
 210             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 211         }
 212         else {
 213             z1 = ( ( a0 | a1 ) != 0 );
 214         }
 215         z0 = 0;
 216     }
 217     *z1Ptr = z1;
 218     *z0Ptr = z0;
 219
 220 }
 221
 222 /*----------------------------------------------------------------------------
 223 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 224 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
 225 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 226 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 227 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
 228 | the most-significant bit of the extra result, and the other 63 bits of the
 229 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
 230 | were all zero.  This extra result is stored in the location pointed to by
 231 | `z2Ptr'.  The value of `count' can be arbitrarily large.
 232 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
 233 | to form a fixed-point value with binary point between `a1' and `a2'.  This
 234 | fixed-point value is shifted right by the number of bits given in `count',
 235 | and the integer part of the result is returned at the locations pointed to
 236 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 237 | corrupted as described above, and is returned at the location pointed to by
 238 | `z2Ptr'.)
 239 *----------------------------------------------------------------------------*/
 240
 241 INLINE void
 242  shift128ExtraRightJamming(
 243      uint64_t a0,
 244      uint64_t a1,
 245      uint64_t a2,
 246      int_fast16_t count,
 247      uint64_t *z0Ptr,
 248      uint64_t *z1Ptr,
 249      uint64_t *z2Ptr
 250  )
 251 {
 252     uint64_t z0, z1, z2;
 253     int8 negCount = ( - count ) & 63;
 254
 255     if ( count == 0 ) {
 256         z2 = a2;
 257         z1 = a1;
 258         z0 = a0;
 259     }
 260     else {
 261         if ( count < 64 ) {
 262             z2 = a1<<negCount;
 263             z1 = ( a0<<negCount ) | ( a1>>count );
 264             z0 = a0>>count;
 265         }
 266         else {
 267             if ( count == 64 ) {
 268                 z2 = a1;
 269                 z1 = a0;
 270             }
 271             else {
 272                 a2 |= a1;
 273                 if ( count < 128 ) {
 274                     z2 = a0<<negCount;
 275                     z1 = a0>>( count & 63 );
 276                 }
 277                 else {
 278                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 279                     z1 = 0;
 280                 }
 281             }
 282             z0 = 0;
 283         }
 284         z2 |= ( a2 != 0 );
 285     }
 286     *z2Ptr = z2;
 287     *z1Ptr = z1;
 288     *z0Ptr = z0;
 289
 290 }
 291
 292 /*----------------------------------------------------------------------------
 293 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 294 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 295 | of `count' must be less than 64.  The result is broken into two 64-bit
 296 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 297 *----------------------------------------------------------------------------*/
 298
 299 INLINE void
 300  shortShift128Left(
 301      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 302 {
 303
 304     *z1Ptr = a1<<count;
 305     *z0Ptr =
 306         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 307
 308 }
 309
 310 /*----------------------------------------------------------------------------
 311 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
 312 | by the number of bits given in `count'.  Any bits shifted off are lost.
 313 | The value of `count' must be less than 64.  The result is broken into three
 314 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 315 | `z1Ptr', and `z2Ptr'.
 316 *----------------------------------------------------------------------------*/
 317
 318 INLINE void
 319  shortShift192Left(
 320      uint64_t a0,
 321      uint64_t a1,
 322      uint64_t a2,
 323      int_fast16_t count,
 324      uint64_t *z0Ptr,
 325      uint64_t *z1Ptr,
 326      uint64_t *z2Ptr
 327  )
 328 {
 329     uint64_t z0, z1, z2;
 330     int8 negCount;
 331
 332     z2 = a2<<count;
 333     z1 = a1<<count;
 334     z0 = a0<<count;
 335     if ( 0 < count ) {
 336         negCount = ( ( - count ) & 63 );
 337         z1 |= a2>>negCount;
 338         z0 |= a1>>negCount;
 339     }
 340     *z2Ptr = z2;
 341     *z1Ptr = z1;
 342     *z0Ptr = z0;
 343
 344 }
 345
 346 /*----------------------------------------------------------------------------
 347 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 348 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 349 | any carry out is lost.  The result is broken into two 64-bit pieces which
 350 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 351 *----------------------------------------------------------------------------*/
 352
 353 INLINE void
 354  add128(
 355      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 356 {
 357     uint64_t z1;
 358
 359     z1 = a1 + b1;
 360     *z1Ptr = z1;
 361     *z0Ptr = a0 + b0 + ( z1 < a1 );
 362
 363 }
 364
 365 /*----------------------------------------------------------------------------
 366 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 367 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 368 | modulo 2^192, so any carry out is lost.  The result is broken into three
 369 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 370 | `z1Ptr', and `z2Ptr'.
 371 *----------------------------------------------------------------------------*/
 372
 373 INLINE void
 374  add192(
 375      uint64_t a0,
 376      uint64_t a1,
 377      uint64_t a2,
 378      uint64_t b0,
 379      uint64_t b1,
 380      uint64_t b2,
 381      uint64_t *z0Ptr,
 382      uint64_t *z1Ptr,
 383      uint64_t *z2Ptr
 384  )
 385 {
 386     uint64_t z0, z1, z2;
 387     int8 carry0, carry1;
 388
 389     z2 = a2 + b2;
 390     carry1 = ( z2 < a2 );
 391     z1 = a1 + b1;
 392     carry0 = ( z1 < a1 );
 393     z0 = a0 + b0;
 394     z1 += carry1;
 395     z0 += ( z1 < carry1 );
 396     z0 += carry0;
 397     *z2Ptr = z2;
 398     *z1Ptr = z1;
 399     *z0Ptr = z0;
 400
 401 }
 402
 403 /*----------------------------------------------------------------------------
 404 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 405 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 406 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 407 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 408 | `z1Ptr'.
 409 *----------------------------------------------------------------------------*/
 410
 411 INLINE void
 412  sub128(
 413      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 414 {
 415
 416     *z1Ptr = a1 - b1;
 417     *z0Ptr = a0 - b0 - ( a1 < b1 );
 418
 419 }
 420
 421 /*----------------------------------------------------------------------------
 422 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 423 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 424 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 425 | result is broken into three 64-bit pieces which are stored at the locations
 426 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 427 *----------------------------------------------------------------------------*/
 428
 429 INLINE void
 430  sub192(
 431      uint64_t a0,
 432      uint64_t a1,
 433      uint64_t a2,
 434      uint64_t b0,
 435      uint64_t b1,
 436      uint64_t b2,
 437      uint64_t *z0Ptr,
 438      uint64_t *z1Ptr,
 439      uint64_t *z2Ptr
 440  )
 441 {
 442     uint64_t z0, z1, z2;
 443     int8 borrow0, borrow1;
 444
 445     z2 = a2 - b2;
 446     borrow1 = ( a2 < b2 );
 447     z1 = a1 - b1;
 448     borrow0 = ( a1 < b1 );
 449     z0 = a0 - b0;
 450     z0 -= ( z1 < borrow1 );
 451     z1 -= borrow1;
 452     z0 -= borrow0;
 453     *z2Ptr = z2;
 454     *z1Ptr = z1;
 455     *z0Ptr = z0;
 456
 457 }
 458
 459 /*----------------------------------------------------------------------------
 460 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 461 | into two 64-bit pieces which are stored at the locations pointed to by
 462 | `z0Ptr' and `z1Ptr'.
 463 *----------------------------------------------------------------------------*/
 464
 465 INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
 466 {
 467     uint32_t aHigh, aLow, bHigh, bLow;
 468     uint64_t z0, zMiddleA, zMiddleB, z1;
 469
 470     aLow = a;
 471     aHigh = a>>32;
 472     bLow = b;
 473     bHigh = b>>32;
 474     z1 = ( (uint64_t) aLow ) * bLow;
 475     zMiddleA = ( (uint64_t) aLow ) * bHigh;
 476     zMiddleB = ( (uint64_t) aHigh ) * bLow;
 477     z0 = ( (uint64_t) aHigh ) * bHigh;
 478     zMiddleA += zMiddleB;
 479     z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 480     zMiddleA <<= 32;
 481     z1 += zMiddleA;
 482     z0 += ( z1 < zMiddleA );
 483     *z1Ptr = z1;
 484     *z0Ptr = z0;
 485
 486 }
 487
 488 /*----------------------------------------------------------------------------
 489 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 490 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 491 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 492 | `z2Ptr'.
 493 *----------------------------------------------------------------------------*/
 494
 495 INLINE void
 496  mul128By64To192(
 497      uint64_t a0,
 498      uint64_t a1,
 499      uint64_t b,
 500      uint64_t *z0Ptr,
 501      uint64_t *z1Ptr,
 502      uint64_t *z2Ptr
 503  )
 504 {
 505     uint64_t z0, z1, z2, more1;
 506
 507     mul64To128( a1, b, &z1, &z2 );
 508     mul64To128( a0, b, &z0, &more1 );
 509     add128( z0, more1, 0, z1, &z0, &z1 );
 510     *z2Ptr = z2;
 511     *z1Ptr = z1;
 512     *z0Ptr = z0;
 513
 514 }
 515
 516 /*----------------------------------------------------------------------------
 517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 518 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 519 | product.  The product is broken into four 64-bit pieces which are stored at
 520 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 521 *----------------------------------------------------------------------------*/
 522
 523 INLINE void
 524  mul128To256(
 525      uint64_t a0,
 526      uint64_t a1,
 527      uint64_t b0,
 528      uint64_t b1,
 529      uint64_t *z0Ptr,
 530      uint64_t *z1Ptr,
 531      uint64_t *z2Ptr,
 532      uint64_t *z3Ptr
 533  )
 534 {
 535     uint64_t z0, z1, z2, z3;
 536     uint64_t more1, more2;
 537
 538     mul64To128( a1, b1, &z2, &z3 );
 539     mul64To128( a1, b0, &z1, &more2 );
 540     add128( z1, more2, 0, z2, &z1, &z2 );
 541     mul64To128( a0, b0, &z0, &more1 );
 542     add128( z0, more1, 0, z1, &z0, &z1 );
 543     mul64To128( a0, b1, &more1, &more2 );
 544     add128( more1, more2, 0, z2, &more1, &z2 );
 545     add128( z0, z1, 0, more1, &z0, &z1 );
 546     *z3Ptr = z3;
 547     *z2Ptr = z2;
 548     *z1Ptr = z1;
 549     *z0Ptr = z0;
 550
 551 }
 552
 553 /*----------------------------------------------------------------------------
 554 | Returns an approximation to the 64-bit integer quotient obtained by dividing
 555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 556 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 557 | toward zero, the approximation returned lies between q and q + 2 inclusive.
 558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 559 | unsigned integer is returned.
 560 *----------------------------------------------------------------------------*/
 561
 562 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
 563 {
 564     uint64_t b0, b1;
 565     uint64_t rem0, rem1, term0, term1;
 566     uint64_t z;
 567
 568     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 569     b0 = b>>32;
 570     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
 571     mul64To128( b, z, &term0, &term1 );
 572     sub128( a0, a1, term0, term1, &rem0, &rem1 );
 573     while ( ( (int64_t) rem0 ) < 0 ) {
 574         z -= LIT64( 0x100000000 );
 575         b1 = b<<32;
 576         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 577     }
 578     rem0 = ( rem0<<32 ) | ( rem1>>32 );
 579     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 580     return z;
 581
 582 }
 583
 584 /*----------------------------------------------------------------------------
 585 | Returns an approximation to the square root of the 32-bit significand given
 586 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 587 | `aExp' (the least significant bit) is 1, the integer returned approximates
 588 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 589 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 590 | case, the approximation returned lies strictly within +/-2 of the exact
 591 | value.
 592 *----------------------------------------------------------------------------*/
 593
 594 static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a)
 595 {
 596     static const uint16_t sqrtOddAdjustments[] = {
 597         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 598         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 599     };
 600     static const uint16_t sqrtEvenAdjustments[] = {
 601         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 602         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 603     };
 604     int8 index;
 605     uint32_t z;
 606
 607     index = ( a>>27 ) & 15;
 608     if ( aExp & 1 ) {
 609         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
 610         z = ( ( a / z )<<14 ) + ( z<<15 );
 611         a >>= 1;
 612     }
 613     else {
 614         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
 615         z = a / z + z;
 616         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 617         if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
 618     }
 619     return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
 620
 621 }
 622
 623 /*----------------------------------------------------------------------------
 624 | Returns the number of leading 0 bits before the most-significant 1 bit of
 625 | `a'.  If `a' is zero, 32 is returned.
 626 *----------------------------------------------------------------------------*/
 627
 628 static int8 countLeadingZeros32( uint32_t a )
 629 {
 630 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
 631     if (a) {
 632         return __builtin_clz(a);
 633     } else {
 634         return 32;
 635     }
 636 #else
 637     static const int8 countLeadingZerosHigh[] = {
 638         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 639         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 640         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 641         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 642         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 643         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 644         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 645         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 646         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 647         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 648         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 649         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 651         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 652         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 653         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 654     };
 655     int8 shiftCount;
 656
 657     shiftCount = 0;
 658     if ( a < 0x10000 ) {
 659         shiftCount += 16;
 660         a <<= 16;
 661     }
 662     if ( a < 0x1000000 ) {
 663         shiftCount += 8;
 664         a <<= 8;
 665     }
 666     shiftCount += countLeadingZerosHigh[ a>>24 ];
 667     return shiftCount;
 668 #endif
 669 }
 670
 671 /*----------------------------------------------------------------------------
 672 | Returns the number of leading 0 bits before the most-significant 1 bit of
 673 | `a'.  If `a' is zero, 64 is returned.
 674 *----------------------------------------------------------------------------*/
 675
 676 static int8 countLeadingZeros64( uint64_t a )
 677 {
 678 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
 679     if (a) {
 680         return __builtin_clzll(a);
 681     } else {
 682         return 64;
 683     }
 684 #else
 685     int8 shiftCount;
 686
 687     shiftCount = 0;
 688     if ( a < ( (uint64_t) 1 )<<32 ) {
 689         shiftCount += 32;
 690     }
 691     else {
 692         a >>= 32;
 693     }
 694     shiftCount += countLeadingZeros32( a );
 695     return shiftCount;
 696 #endif
 697 }
 698
 699 /*----------------------------------------------------------------------------
 700 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 701 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 702 | Otherwise, returns 0.
 703 *----------------------------------------------------------------------------*/
 704
 705 INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 706 {
 707
 708     return ( a0 == b0 ) && ( a1 == b1 );
 709
 710 }
 711
 712 /*----------------------------------------------------------------------------
 713 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 714 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 715 | Otherwise, returns 0.
 716 *----------------------------------------------------------------------------*/
 717
 718 INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 719 {
 720
 721     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 722
 723 }
 724
 725 /*----------------------------------------------------------------------------
 726 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 727 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 728 | returns 0.
 729 *----------------------------------------------------------------------------*/
 730
 731 INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 732 {
 733
 734     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 735
 736 }
 737
 738 /*----------------------------------------------------------------------------
 739 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 740 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 741 | Otherwise, returns 0.
 742 *----------------------------------------------------------------------------*/
 743
 744 INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 745 {
 746
 747     return ( a0 != b0 ) || ( a1 != b1 );
 748
 749 }