arch/arm26/nwfpe/softfloat-macros

   1
   2 /*
   3 ===============================================================================
   4
   5 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   6 Arithmetic Package, Release 2.
   7
   8 Written by John R. Hauser.  This work was made possible in part by the
   9 International Computer Science Institute, located at Suite 600, 1947 Center
  10 Street, Berkeley, California 94704.  Funding was partially provided by the
  11 National Science Foundation under grant MIP-9311980.  The original version
  12 of this code was written as part of a project to build a fixed-point vector
  13 processor in collaboration with the University of California at Berkeley,
  14 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  15 is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  16 arithmetic/softfloat.html'.
  17
  18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  19 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  20 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  23
  24 Derivative works are acceptable, even for commercial purposes, so long as
  25 (1) they include prominent notice that the work is derivative, and (2) they
  26 include prominent notice akin to these three paragraphs for those parts of
  27 this code that are retained.
  28
  29 ===============================================================================
  30 */
  31
  32 /*
  33 -------------------------------------------------------------------------------
  34 Shifts `a' right by the number of bits given in `count'.  If any nonzero
  35 bits are shifted off, they are ``jammed'' into the least significant bit of
  36 the result by setting the least significant bit to 1.  The value of `count'
  37 can be arbitrarily large; in particular, if `count' is greater than 32, the
  38 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  39 The result is stored in the location pointed to by `zPtr'.
  40 -------------------------------------------------------------------------------
  41 */
  42 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
  43 {
  44     bits32 z;
  45     if ( count == 0 ) {
  46         z = a;
  47     }
  48     else if ( count < 32 ) {
  49         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  50     }
  51     else {
  52         z = ( a != 0 );
  53     }
  54     *zPtr = z;
  55 }
  56
  57 /*
  58 -------------------------------------------------------------------------------
  59 Shifts `a' right by the number of bits given in `count'.  If any nonzero
  60 bits are shifted off, they are ``jammed'' into the least significant bit of
  61 the result by setting the least significant bit to 1.  The value of `count'
  62 can be arbitrarily large; in particular, if `count' is greater than 64, the
  63 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  64 The result is stored in the location pointed to by `zPtr'.
  65 -------------------------------------------------------------------------------
  66 */
  67 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
  68 {
  69     bits64 z;
  70
  71  __asm__("@shift64RightJamming -- start");
  72     if ( count == 0 ) {
  73         z = a;
  74     }
  75     else if ( count < 64 ) {
  76         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  77     }
  78     else {
  79         z = ( a != 0 );
  80     }
  81  __asm__("@shift64RightJamming -- end");
  82     *zPtr = z;
  83 }
  84
  85 /*
  86 -------------------------------------------------------------------------------
  87 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  88 _plus_ the number of bits given in `count'.  The shifted result is at most
  89 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
  90 bits shifted off form a second 64-bit result as follows:  The _last_ bit
  91 shifted off is the most-significant bit of the extra result, and the other
  92 63 bits of the extra result are all zero if and only if _all_but_the_last_
  93 bits shifted off were all zero.  This extra result is stored in the location
  94 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
  95     (This routine makes more sense if `a0' and `a1' are considered to form a
  96 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
  97 value is shifted right by the number of bits given in `count', and the
  98 integer part of the result is returned at the location pointed to by
  99 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 100 described above, and is returned at the location pointed to by `z1Ptr'.)
 101 -------------------------------------------------------------------------------
 102 */
 103 INLINE void
 104  shift64ExtraRightJamming(
 105      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 106 {
 107     bits64 z0, z1;
 108     int8 negCount = ( - count ) & 63;
 109
 110     if ( count == 0 ) {
 111         z1 = a1;
 112         z0 = a0;
 113     }
 114     else if ( count < 64 ) {
 115         z1 = ( a0<<negCount ) | ( a1 != 0 );
 116         z0 = a0>>count;
 117     }
 118     else {
 119         if ( count == 64 ) {
 120             z1 = a0 | ( a1 != 0 );
 121         }
 122         else {
 123             z1 = ( ( a0 | a1 ) != 0 );
 124         }
 125         z0 = 0;
 126     }
 127     *z1Ptr = z1;
 128     *z0Ptr = z0;
 129
 130 }
 131
 132 /*
 133 -------------------------------------------------------------------------------
 134 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 135 number of bits given in `count'.  Any bits shifted off are lost.  The value
 136 of `count' can be arbitrarily large; in particular, if `count' is greater
 137 than 128, the result will be 0.  The result is broken into two 64-bit pieces
 138 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 139 -------------------------------------------------------------------------------
 140 */
 141 INLINE void
 142  shift128Right(
 143      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 144 {
 145     bits64 z0, z1;
 146     int8 negCount = ( - count ) & 63;
 147
 148     if ( count == 0 ) {
 149         z1 = a1;
 150         z0 = a0;
 151     }
 152     else if ( count < 64 ) {
 153         z1 = ( a0<<negCount ) | ( a1>>count );
 154         z0 = a0>>count;
 155     }
 156     else {
 157         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
 158         z0 = 0;
 159     }
 160     *z1Ptr = z1;
 161     *z0Ptr = z0;
 162
 163 }
 164
 165 /*
 166 -------------------------------------------------------------------------------
 167 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 168 number of bits given in `count'.  If any nonzero bits are shifted off, they
 169 are ``jammed'' into the least significant bit of the result by setting the
 170 least significant bit to 1.  The value of `count' can be arbitrarily large;
 171 in particular, if `count' is greater than 128, the result will be either 0
 172 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 173 nonzero.  The result is broken into two 64-bit pieces which are stored at
 174 the locations pointed to by `z0Ptr' and `z1Ptr'.
 175 -------------------------------------------------------------------------------
 176 */
 177 INLINE void
 178  shift128RightJamming(
 179      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 180 {
 181     bits64 z0, z1;
 182     int8 negCount = ( - count ) & 63;
 183
 184     if ( count == 0 ) {
 185         z1 = a1;
 186         z0 = a0;
 187     }
 188     else if ( count < 64 ) {
 189         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 190         z0 = a0>>count;
 191     }
 192     else {
 193         if ( count == 64 ) {
 194             z1 = a0 | ( a1 != 0 );
 195         }
 196         else if ( count < 128 ) {
 197             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 198         }
 199         else {
 200             z1 = ( ( a0 | a1 ) != 0 );
 201         }
 202         z0 = 0;
 203     }
 204     *z1Ptr = z1;
 205     *z0Ptr = z0;
 206
 207 }
 208
 209 /*
 210 -------------------------------------------------------------------------------
 211 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 212 by 64 _plus_ the number of bits given in `count'.  The shifted result is
 213 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 214 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 215 off form a third 64-bit result as follows:  The _last_ bit shifted off is
 216 the most-significant bit of the extra result, and the other 63 bits of the
 217 extra result are all zero if and only if _all_but_the_last_ bits shifted off
 218 were all zero.  This extra result is stored in the location pointed to by
 219 `z2Ptr'.  The value of `count' can be arbitrarily large.
 220     (This routine makes more sense if `a0', `a1', and `a2' are considered
 221 to form a fixed-point value with binary point between `a1' and `a2'.  This
 222 fixed-point value is shifted right by the number of bits given in `count',
 223 and the integer part of the result is returned at the locations pointed to
 224 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 225 corrupted as described above, and is returned at the location pointed to by
 226 `z2Ptr'.)
 227 -------------------------------------------------------------------------------
 228 */
 229 INLINE void
 230  shift128ExtraRightJamming(
 231      bits64 a0,
 232      bits64 a1,
 233      bits64 a2,
 234      int16 count,
 235      bits64 *z0Ptr,
 236      bits64 *z1Ptr,
 237      bits64 *z2Ptr
 238  )
 239 {
 240     bits64 z0, z1, z2;
 241     int8 negCount = ( - count ) & 63;
 242
 243     if ( count == 0 ) {
 244         z2 = a2;
 245         z1 = a1;
 246         z0 = a0;
 247     }
 248     else {
 249         if ( count < 64 ) {
 250             z2 = a1<<negCount;
 251             z1 = ( a0<<negCount ) | ( a1>>count );
 252             z0 = a0>>count;
 253         }
 254         else {
 255             if ( count == 64 ) {
 256                 z2 = a1;
 257                 z1 = a0;
 258             }
 259             else {
 260                 a2 |= a1;
 261                 if ( count < 128 ) {
 262                     z2 = a0<<negCount;
 263                     z1 = a0>>( count & 63 );
 264                 }
 265                 else {
 266                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 267                     z1 = 0;
 268                 }
 269             }
 270             z0 = 0;
 271         }
 272         z2 |= ( a2 != 0 );
 273     }
 274     *z2Ptr = z2;
 275     *z1Ptr = z1;
 276     *z0Ptr = z0;
 277
 278 }
 279
 280 /*
 281 -------------------------------------------------------------------------------
 282 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 283 number of bits given in `count'.  Any bits shifted off are lost.  The value
 284 of `count' must be less than 64.  The result is broken into two 64-bit
 285 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 286 -------------------------------------------------------------------------------
 287 */
 288 INLINE void
 289  shortShift128Left(
 290      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 291 {
 292
 293     *z1Ptr = a1<<count;
 294     *z0Ptr =
 295         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 296
 297 }
 298
 299 /*
 300 -------------------------------------------------------------------------------
 301 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
 302 by the number of bits given in `count'.  Any bits shifted off are lost.
 303 The value of `count' must be less than 64.  The result is broken into three
 304 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 305 `z1Ptr', and `z2Ptr'.
 306 -------------------------------------------------------------------------------
 307 */
 308 INLINE void
 309  shortShift192Left(
 310      bits64 a0,
 311      bits64 a1,
 312      bits64 a2,
 313      int16 count,
 314      bits64 *z0Ptr,
 315      bits64 *z1Ptr,
 316      bits64 *z2Ptr
 317  )
 318 {
 319     bits64 z0, z1, z2;
 320     int8 negCount;
 321
 322     z2 = a2<<count;
 323     z1 = a1<<count;
 324     z0 = a0<<count;
 325     if ( 0 < count ) {
 326         negCount = ( ( - count ) & 63 );
 327         z1 |= a2>>negCount;
 328         z0 |= a1>>negCount;
 329     }
 330     *z2Ptr = z2;
 331     *z1Ptr = z1;
 332     *z0Ptr = z0;
 333
 334 }
 335
 336 /*
 337 -------------------------------------------------------------------------------
 338 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 339 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 340 any carry out is lost.  The result is broken into two 64-bit pieces which
 341 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 342 -------------------------------------------------------------------------------
 343 */
 344 INLINE void
 345  add128(
 346      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 347 {
 348     bits64 z1;
 349
 350     z1 = a1 + b1;
 351     *z1Ptr = z1;
 352     *z0Ptr = a0 + b0 + ( z1 < a1 );
 353
 354 }
 355
 356 /*
 357 -------------------------------------------------------------------------------
 358 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 359 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 360 modulo 2^192, so any carry out is lost.  The result is broken into three
 361 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 362 `z1Ptr', and `z2Ptr'.
 363 -------------------------------------------------------------------------------
 364 */
 365 INLINE void
 366  add192(
 367      bits64 a0,
 368      bits64 a1,
 369      bits64 a2,
 370      bits64 b0,
 371      bits64 b1,
 372      bits64 b2,
 373      bits64 *z0Ptr,
 374      bits64 *z1Ptr,
 375      bits64 *z2Ptr
 376  )
 377 {
 378     bits64 z0, z1, z2;
 379     int8 carry0, carry1;
 380
 381     z2 = a2 + b2;
 382     carry1 = ( z2 < a2 );
 383     z1 = a1 + b1;
 384     carry0 = ( z1 < a1 );
 385     z0 = a0 + b0;
 386     z1 += carry1;
 387     z0 += ( z1 < carry1 );
 388     z0 += carry0;
 389     *z2Ptr = z2;
 390     *z1Ptr = z1;
 391     *z0Ptr = z0;
 392
 393 }
 394
 395 /*
 396 -------------------------------------------------------------------------------
 397 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 398 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 399 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 400 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 401 `z1Ptr'.
 402 -------------------------------------------------------------------------------
 403 */
 404 INLINE void
 405  sub128(
 406      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 407 {
 408
 409     *z1Ptr = a1 - b1;
 410     *z0Ptr = a0 - b0 - ( a1 < b1 );
 411
 412 }
 413
 414 /*
 415 -------------------------------------------------------------------------------
 416 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 417 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 418 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 419 result is broken into three 64-bit pieces which are stored at the locations
 420 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 421 -------------------------------------------------------------------------------
 422 */
 423 INLINE void
 424  sub192(
 425      bits64 a0,
 426      bits64 a1,
 427      bits64 a2,
 428      bits64 b0,
 429      bits64 b1,
 430      bits64 b2,
 431      bits64 *z0Ptr,
 432      bits64 *z1Ptr,
 433      bits64 *z2Ptr
 434  )
 435 {
 436     bits64 z0, z1, z2;
 437     int8 borrow0, borrow1;
 438
 439     z2 = a2 - b2;
 440     borrow1 = ( a2 < b2 );
 441     z1 = a1 - b1;
 442     borrow0 = ( a1 < b1 );
 443     z0 = a0 - b0;
 444     z0 -= ( z1 < borrow1 );
 445     z1 -= borrow1;
 446     z0 -= borrow0;
 447     *z2Ptr = z2;
 448     *z1Ptr = z1;
 449     *z0Ptr = z0;
 450
 451 }
 452
 453 /*
 454 -------------------------------------------------------------------------------
 455 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 456 into two 64-bit pieces which are stored at the locations pointed to by
 457 `z0Ptr' and `z1Ptr'.
 458 -------------------------------------------------------------------------------
 459 */
 460 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
 461 {
 462     bits32 aHigh, aLow, bHigh, bLow;
 463     bits64 z0, zMiddleA, zMiddleB, z1;
 464
 465     aLow = a;
 466     aHigh = a>>32;
 467     bLow = b;
 468     bHigh = b>>32;
 469     z1 = ( (bits64) aLow ) * bLow;
 470     zMiddleA = ( (bits64) aLow ) * bHigh;
 471     zMiddleB = ( (bits64) aHigh ) * bLow;
 472     z0 = ( (bits64) aHigh ) * bHigh;
 473     zMiddleA += zMiddleB;
 474     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 475     zMiddleA <<= 32;
 476     z1 += zMiddleA;
 477     z0 += ( z1 < zMiddleA );
 478     *z1Ptr = z1;
 479     *z0Ptr = z0;
 480
 481 }
 482
 483 /*
 484 -------------------------------------------------------------------------------
 485 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
 486 obtain a 192-bit product.  The product is broken into three 64-bit pieces
 487 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 488 `z2Ptr'.
 489 -------------------------------------------------------------------------------
 490 */
 491 INLINE void
 492  mul128By64To192(
 493      bits64 a0,
 494      bits64 a1,
 495      bits64 b,
 496      bits64 *z0Ptr,
 497      bits64 *z1Ptr,
 498      bits64 *z2Ptr
 499  )
 500 {
 501     bits64 z0, z1, z2, more1;
 502
 503     mul64To128( a1, b, &z1, &z2 );
 504     mul64To128( a0, b, &z0, &more1 );
 505     add128( z0, more1, 0, z1, &z0, &z1 );
 506     *z2Ptr = z2;
 507     *z1Ptr = z1;
 508     *z0Ptr = z0;
 509
 510 }
 511
 512 /*
 513 -------------------------------------------------------------------------------
 514 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 515 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 516 product.  The product is broken into four 64-bit pieces which are stored at
 517 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 518 -------------------------------------------------------------------------------
 519 */
 520 INLINE void
 521  mul128To256(
 522      bits64 a0,
 523      bits64 a1,
 524      bits64 b0,
 525      bits64 b1,
 526      bits64 *z0Ptr,
 527      bits64 *z1Ptr,
 528      bits64 *z2Ptr,
 529      bits64 *z3Ptr
 530  )
 531 {
 532     bits64 z0, z1, z2, z3;
 533     bits64 more1, more2;
 534
 535     mul64To128( a1, b1, &z2, &z3 );
 536     mul64To128( a1, b0, &z1, &more2 );
 537     add128( z1, more2, 0, z2, &z1, &z2 );
 538     mul64To128( a0, b0, &z0, &more1 );
 539     add128( z0, more1, 0, z1, &z0, &z1 );
 540     mul64To128( a0, b1, &more1, &more2 );
 541     add128( more1, more2, 0, z2, &more1, &z2 );
 542     add128( z0, z1, 0, more1, &z0, &z1 );
 543     *z3Ptr = z3;
 544     *z2Ptr = z2;
 545     *z1Ptr = z1;
 546     *z0Ptr = z0;
 547
 548 }
 549
 550 /*
 551 -------------------------------------------------------------------------------
 552 Returns an approximation to the 64-bit integer quotient obtained by dividing
 553 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 554 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 555 toward zero, the approximation returned lies between q and q + 2 inclusive.
 556 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 557 unsigned integer is returned.
 558 -------------------------------------------------------------------------------
 559 */
 560 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
 561 {
 562     bits64 b0, b1;
 563     bits64 rem0, rem1, term0, term1;
 564     bits64 z;
 565     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 566     b0 = b>>32;
 567     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
 568     mul64To128( b, z, &term0, &term1 );
 569     sub128( a0, a1, term0, term1, &rem0, &rem1 );
 570     while ( ( (sbits64) rem0 ) < 0 ) {
 571         z -= LIT64( 0x100000000 );
 572         b1 = b<<32;
 573         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 574     }
 575     rem0 = ( rem0<<32 ) | ( rem1>>32 );
 576     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 577     return z;
 578
 579 }
 580
 581 /*
 582 -------------------------------------------------------------------------------
 583 Returns an approximation to the square root of the 32-bit significand given
 584 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 585 `aExp' (the least significant bit) is 1, the integer returned approximates
 586 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 587 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 588 case, the approximation returned lies strictly within +/-2 of the exact
 589 value.
 590 -------------------------------------------------------------------------------
 591 */
 592 static bits32 estimateSqrt32( int16 aExp, bits32 a )
 593 {
 594     static const bits16 sqrtOddAdjustments[] = {
 595         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 596         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 597     };
 598     static const bits16 sqrtEvenAdjustments[] = {
 599         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 600         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 601     };
 602     int8 index;
 603     bits32 z;
 604
 605     index = ( a>>27 ) & 15;
 606     if ( aExp & 1 ) {
 607         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
 608         z = ( ( a / z )<<14 ) + ( z<<15 );
 609         a >>= 1;
 610     }
 611     else {
 612         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
 613         z = a / z + z;
 614         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 615         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
 616     }
 617     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
 618
 619 }
 620
 621 /*
 622 -------------------------------------------------------------------------------
 623 Returns the number of leading 0 bits before the most-significant 1 bit
 624 of `a'.  If `a' is zero, 32 is returned.
 625 -------------------------------------------------------------------------------
 626 */
 627 static int8 countLeadingZeros32( bits32 a )
 628 {
 629     static const int8 countLeadingZerosHigh[] = {
 630         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 631         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 632         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 633         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 634         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 635         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 636         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 637         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 638         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 639         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 640         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 641         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 642         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 643         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 644         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 645         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 646     };
 647     int8 shiftCount;
 648
 649     shiftCount = 0;
 650     if ( a < 0x10000 ) {
 651         shiftCount += 16;
 652         a <<= 16;
 653     }
 654     if ( a < 0x1000000 ) {
 655         shiftCount += 8;
 656         a <<= 8;
 657     }
 658     shiftCount += countLeadingZerosHigh[ a>>24 ];
 659     return shiftCount;
 660
 661 }
 662
 663 /*
 664 -------------------------------------------------------------------------------
 665 Returns the number of leading 0 bits before the most-significant 1 bit
 666 of `a'.  If `a' is zero, 64 is returned.
 667 -------------------------------------------------------------------------------
 668 */
 669 static int8 countLeadingZeros64( bits64 a )
 670 {
 671     int8 shiftCount;
 672
 673     shiftCount = 0;
 674     if ( a < ( (bits64) 1 )<<32 ) {
 675         shiftCount += 32;
 676     }
 677     else {
 678         a >>= 32;
 679     }
 680     shiftCount += countLeadingZeros32( a );
 681     return shiftCount;
 682
 683 }
 684
 685 /*
 686 -------------------------------------------------------------------------------
 687 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 688 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 689 Otherwise, returns 0.
 690 -------------------------------------------------------------------------------
 691 */
 692 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 693 {
 694
 695     return ( a0 == b0 ) && ( a1 == b1 );
 696
 697 }
 698
 699 /*
 700 -------------------------------------------------------------------------------
 701 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 702 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 703 Otherwise, returns 0.
 704 -------------------------------------------------------------------------------
 705 */
 706 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 707 {
 708
 709     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 710
 711 }
 712
 713 /*
 714 -------------------------------------------------------------------------------
 715 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 716 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 717 returns 0.
 718 -------------------------------------------------------------------------------
 719 */
 720 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 721 {
 722
 723     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 724
 725 }
 726
 727 /*
 728 -------------------------------------------------------------------------------
 729 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 730 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 731 Otherwise, returns 0.
 732 -------------------------------------------------------------------------------
 733 */
 734 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 735 {
 736
 737     return ( a0 != b0 ) || ( a1 != b1 );
 738
 739 }
 740