compilers/pcc/pcc-libs-1.0.0/libsoftfloat/bits32/softfloat.c

   1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
   2
   3 /*
   4  * This version hacked for use with gcc -msoft-float by bjh21.
   5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
   6  *  itself).
   7  */
   8
   9 /*
  10  * Things you may want to define:
  11  *
  12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
  13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
  14  *   properly renamed.
  15  */
  16
  17 /*
  18  * This differs from the standard bits32/softfloat.c in that float64
  19  * is defined to be a 64-bit integer rather than a structure.  The
  20  * structure is float64s, with translation between the two going via
  21  * float64u.
  22  */
  23
  24 /*
  25 ===============================================================================
  26
  27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
  28 Arithmetic Package, Release 2a.
  29
  30 Written by John R. Hauser.  This work was made possible in part by the
  31 International Computer Science Institute, located at Suite 600, 1947 Center
  32 Street, Berkeley, California 94704.  Funding was partially provided by the
  33 National Science Foundation under grant MIP-9311980.  The original version
  34 of this code was written as part of a project to build a fixed-point vector
  35 processor in collaboration with the University of California at Berkeley,
  36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  38 arithmetic/SoftFloat.html'.
  39
  40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  45
  46 Derivative works are acceptable, even for commercial purposes, so long as
  47 (1) they include prominent notice that the work is derivative, and (2) they
  48 include prominent notice akin to these four paragraphs for those parts of
  49 this code that are retained.
  50
  51 ===============================================================================
  52 */
  53
  54 #include <sys/cdefs.h>
  55 #if defined(LIBC_SCCS) && !defined(lint)
  56 __RCSID("$NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $");
  57 #endif /* LIBC_SCCS and not lint */
  58
  59 #ifdef SOFTFLOAT_FOR_GCC
  60 #include "softfloat-for-gcc.h"
  61 #endif
  62
  63 #include "milieu.h"
  64 #include "softfloat.h"
  65
  66 /*
  67  * Conversions between floats as stored in memory and floats as
  68  * SoftFloat uses them
  69  */
  70 #ifndef FLOAT64_DEMANGLE
  71 #define FLOAT64_DEMANGLE(a)     (a)
  72 #endif
  73 #ifndef FLOAT64_MANGLE
  74 #define FLOAT64_MANGLE(a)       (a)
  75 #endif
  76
  77 /*
  78 -------------------------------------------------------------------------------
  79 Floating-point rounding mode and exception flags.
  80 -------------------------------------------------------------------------------
  81 */
  82 fp_rnd float_rounding_mode = float_round_nearest_even;
  83 fp_except float_exception_flags = 0;
  84
  85 /*
  86 -------------------------------------------------------------------------------
  87 Primitive arithmetic functions, including multi-word arithmetic, and
  88 division and square root approximations.  (Can be specialized to target if
  89 desired.)
  90 -------------------------------------------------------------------------------
  91 */
  92 #include "softfloat-macros"
  93
  94 /*
  95 -------------------------------------------------------------------------------
  96 Functions and definitions to determine:  (1) whether tininess for underflow
  97 is detected before or after rounding by default, (2) what (if anything)
  98 happens when exceptions are raised, (3) how signaling NaNs are distinguished
  99 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
 100 are propagated from function inputs to output.  These details are target-
 101 specific.
 102 -------------------------------------------------------------------------------
 103 */
 104 #include "softfloat-specialize"
 105
 106 /*
 107 -------------------------------------------------------------------------------
 108 Returns the fraction bits of the single-precision floating-point value `a'.
 109 -------------------------------------------------------------------------------
 110 */
 111 INLINE bits32 extractFloat32Frac( float32 a )
 112 {
 113
 114     return a & 0x007FFFFF;
 115
 116 }
 117
 118 /*
 119 -------------------------------------------------------------------------------
 120 Returns the exponent bits of the single-precision floating-point value `a'.
 121 -------------------------------------------------------------------------------
 122 */
 123 INLINE int16 extractFloat32Exp( float32 a )
 124 {
 125
 126     return ( a>>23 ) & 0xFF;
 127
 128 }
 129
 130 /*
 131 -------------------------------------------------------------------------------
 132 Returns the sign bit of the single-precision floating-point value `a'.
 133 -------------------------------------------------------------------------------
 134 */
 135 INLINE flag extractFloat32Sign( float32 a )
 136 {
 137
 138     return a>>31;
 139
 140 }
 141
 142 /*
 143 -------------------------------------------------------------------------------
 144 Normalizes the subnormal single-precision floating-point value represented
 145 by the denormalized significand `aSig'.  The normalized exponent and
 146 significand are stored at the locations pointed to by `zExpPtr' and
 147 `zSigPtr', respectively.
 148 -------------------------------------------------------------------------------
 149 */
 150 static void
 151  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 152 {
 153     int8 shiftCount;
 154
 155     shiftCount = countLeadingZeros32( aSig ) - 8;
 156     *zSigPtr = aSig<<shiftCount;
 157     *zExpPtr = 1 - shiftCount;
 158
 159 }
 160
 161 /*
 162 -------------------------------------------------------------------------------
 163 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 164 single-precision floating-point value, returning the result.  After being
 165 shifted into the proper positions, the three fields are simply added
 166 together to form the result.  This means that any integer portion of `zSig'
 167 will be added into the exponent.  Since a properly normalized significand
 168 will have an integer portion equal to 1, the `zExp' input should be 1 less
 169 than the desired result exponent whenever `zSig' is a complete, normalized
 170 significand.
 171 -------------------------------------------------------------------------------
 172 */
 173 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
 174 {
 175
 176     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
 177
 178 }
 179
 180 /*
 181 -------------------------------------------------------------------------------
 182 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 183 and significand `zSig', and returns the proper single-precision floating-
 184 point value corresponding to the abstract input.  Ordinarily, the abstract
 185 value is simply rounded and packed into the single-precision format, with
 186 the inexact exception raised if the abstract input cannot be represented
 187 exactly.  However, if the abstract value is too large, the overflow and
 188 inexact exceptions are raised and an infinity or maximal finite value is
 189 returned.  If the abstract value is too small, the input value is rounded to
 190 a subnormal number, and the underflow and inexact exceptions are raised if
 191 the abstract input cannot be represented exactly as a subnormal single-
 192 precision floating-point number.
 193     The input significand `zSig' has its binary point between bits 30
 194 and 29, which is 7 bits to the left of the usual location.  This shifted
 195 significand must be normalized or smaller.  If `zSig' is not normalized,
 196 `zExp' must be 0; in that case, the result returned is a subnormal number,
 197 and it must not require rounding.  In the usual case that `zSig' is
 198 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 199 The handling of underflow and overflow follows the IEC/IEEE Standard for
 200 Binary Floating-Point Arithmetic.
 201 -------------------------------------------------------------------------------
 202 */
 203 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 204 {
 205     int8 roundingMode;
 206     flag roundNearestEven;
 207     int8 roundIncrement, roundBits;
 208     flag isTiny;
 209
 210     roundingMode = float_rounding_mode;
 211     roundNearestEven = roundingMode == float_round_nearest_even;
 212     roundIncrement = 0x40;
 213     if ( ! roundNearestEven ) {
 214         if ( roundingMode == float_round_to_zero ) {
 215             roundIncrement = 0;
 216         }
 217         else {
 218             roundIncrement = 0x7F;
 219             if ( zSign ) {
 220                 if ( roundingMode == float_round_up ) roundIncrement = 0;
 221             }
 222             else {
 223                 if ( roundingMode == float_round_down ) roundIncrement = 0;
 224             }
 225         }
 226     }
 227     roundBits = zSig & 0x7F;
 228     if ( 0xFD <= (bits16) zExp ) {
 229         if (    ( 0xFD < zExp )
 230              || (    ( zExp == 0xFD )
 231                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
 232            ) {
 233             float_raise( float_flag_overflow | float_flag_inexact );
 234             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
 235         }
 236         if ( zExp < 0 ) {
 237             isTiny =
 238                    ( float_detect_tininess == float_tininess_before_rounding )
 239                 || ( zExp < -1 )
 240                 || ( zSig + roundIncrement < 0x80000000 );
 241             shift32RightJamming( zSig, - zExp, &zSig );
 242             zExp = 0;
 243             roundBits = zSig & 0x7F;
 244             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 245         }
 246     }
 247     if ( roundBits ) float_exception_flags |= float_flag_inexact;
 248     zSig = ( zSig + roundIncrement )>>7;
 249     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 250     if ( zSig == 0 ) zExp = 0;
 251     return packFloat32( zSign, zExp, zSig );
 252
 253 }
 254
 255 /*
 256 -------------------------------------------------------------------------------
 257 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 258 and significand `zSig', and returns the proper single-precision floating-
 259 point value corresponding to the abstract input.  This routine is just like
 260 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 261 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 262 floating-point exponent.
 263 -------------------------------------------------------------------------------
 264 */
 265 static float32
 266  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 267 {
 268     int8 shiftCount;
 269
 270     shiftCount = countLeadingZeros32( zSig ) - 1;
 271     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
 272
 273 }
 274
 275 /*
 276 -------------------------------------------------------------------------------
 277 Returns the least-significant 32 fraction bits of the double-precision
 278 floating-point value `a'.
 279 -------------------------------------------------------------------------------
 280 */
 281 INLINE bits32 extractFloat64Frac1( float64 a )
 282 {
 283
 284     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
 285
 286 }
 287
 288 /*
 289 -------------------------------------------------------------------------------
 290 Returns the most-significant 20 fraction bits of the double-precision
 291 floating-point value `a'.
 292 -------------------------------------------------------------------------------
 293 */
 294 INLINE bits32 extractFloat64Frac0( float64 a )
 295 {
 296
 297     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
 298
 299 }
 300
 301 /*
 302 -------------------------------------------------------------------------------
 303 Returns the exponent bits of the double-precision floating-point value `a'.
 304 -------------------------------------------------------------------------------
 305 */
 306 INLINE int16 extractFloat64Exp( float64 a )
 307 {
 308
 309     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
 310
 311 }
 312
 313 /*
 314 -------------------------------------------------------------------------------
 315 Returns the sign bit of the double-precision floating-point value `a'.
 316 -------------------------------------------------------------------------------
 317 */
 318 INLINE flag extractFloat64Sign( float64 a )
 319 {
 320
 321     return FLOAT64_DEMANGLE(a)>>63;
 322
 323 }
 324
 325 /*
 326 -------------------------------------------------------------------------------
 327 Normalizes the subnormal double-precision floating-point value represented
 328 by the denormalized significand formed by the concatenation of `aSig0' and
 329 `aSig1'.  The normalized exponent is stored at the location pointed to by
 330 `zExpPtr'.  The most significant 21 bits of the normalized significand are
 331 stored at the location pointed to by `zSig0Ptr', and the least significant
 332 32 bits of the normalized significand are stored at the location pointed to
 333 by `zSig1Ptr'.
 334 -------------------------------------------------------------------------------
 335 */
 336 static void
 337  normalizeFloat64Subnormal(
 338      bits32 aSig0,
 339      bits32 aSig1,
 340      int16 *zExpPtr,
 341      bits32 *zSig0Ptr,
 342      bits32 *zSig1Ptr
 343  )
 344 {
 345     int8 shiftCount;
 346
 347     if ( aSig0 == 0 ) {
 348         shiftCount = countLeadingZeros32( aSig1 ) - 11;
 349         if ( shiftCount < 0 ) {
 350             *zSig0Ptr = aSig1>>( - shiftCount );
 351             *zSig1Ptr = aSig1<<( shiftCount & 31 );
 352         }
 353         else {
 354             *zSig0Ptr = aSig1<<shiftCount;
 355             *zSig1Ptr = 0;
 356         }
 357         *zExpPtr = - shiftCount - 31;
 358     }
 359     else {
 360         shiftCount = countLeadingZeros32( aSig0 ) - 11;
 361         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
 362         *zExpPtr = 1 - shiftCount;
 363     }
 364
 365 }
 366
 367 /*
 368 -------------------------------------------------------------------------------
 369 Packs the sign `zSign', the exponent `zExp', and the significand formed by
 370 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
 371 point value, returning the result.  After being shifted into the proper
 372 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
 373 together to form the most significant 32 bits of the result.  This means
 374 that any integer portion of `zSig0' will be added into the exponent.  Since
 375 a properly normalized significand will have an integer portion equal to 1,
 376 the `zExp' input should be 1 less than the desired result exponent whenever
 377 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
 378 -------------------------------------------------------------------------------
 379 */
 380 INLINE float64
 381  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 382 {
 383
 384     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
 385                            ( ( (bits64) zExp )<<52 ) +
 386                            ( ( (bits64) zSig0 )<<32 ) + zSig1 );
 387
 388
 389 }
 390
 391 /*
 392 -------------------------------------------------------------------------------
 393 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 394 and extended significand formed by the concatenation of `zSig0', `zSig1',
 395 and `zSig2', and returns the proper double-precision floating-point value
 396 corresponding to the abstract input.  Ordinarily, the abstract value is
 397 simply rounded and packed into the double-precision format, with the inexact
 398 exception raised if the abstract input cannot be represented exactly.
 399 However, if the abstract value is too large, the overflow and inexact
 400 exceptions are raised and an infinity or maximal finite value is returned.
 401 If the abstract value is too small, the input value is rounded to a
 402 subnormal number, and the underflow and inexact exceptions are raised if the
 403 abstract input cannot be represented exactly as a subnormal double-precision
 404 floating-point number.
 405     The input significand must be normalized or smaller.  If the input
 406 significand is not normalized, `zExp' must be 0; in that case, the result
 407 returned is a subnormal number, and it must not require rounding.  In the
 408 usual case that the input significand is normalized, `zExp' must be 1 less
 409 than the ``true'' floating-point exponent.  The handling of underflow and
 410 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 411 -------------------------------------------------------------------------------
 412 */
 413 static float64
 414  roundAndPackFloat64(
 415      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
 416 {
 417     int8 roundingMode;
 418     flag roundNearestEven, increment, isTiny;
 419
 420     roundingMode = float_rounding_mode;
 421     roundNearestEven = ( roundingMode == float_round_nearest_even );
 422     increment = ( (sbits32) zSig2 < 0 );
 423     if ( ! roundNearestEven ) {
 424         if ( roundingMode == float_round_to_zero ) {
 425             increment = 0;
 426         }
 427         else {
 428             if ( zSign ) {
 429                 increment = ( roundingMode == float_round_down ) && zSig2;
 430             }
 431             else {
 432                 increment = ( roundingMode == float_round_up ) && zSig2;
 433             }
 434         }
 435     }
 436     if ( 0x7FD <= (bits16) zExp ) {
 437         if (    ( 0x7FD < zExp )
 438              || (    ( zExp == 0x7FD )
 439                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
 440                   && increment
 441                 )
 442            ) {
 443             float_raise( float_flag_overflow | float_flag_inexact );
 444             if (    ( roundingMode == float_round_to_zero )
 445                  || ( zSign && ( roundingMode == float_round_up ) )
 446                  || ( ! zSign && ( roundingMode == float_round_down ) )
 447                ) {
 448                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
 449             }
 450             return packFloat64( zSign, 0x7FF, 0, 0 );
 451         }
 452         if ( zExp < 0 ) {
 453             isTiny =
 454                    ( float_detect_tininess == float_tininess_before_rounding )
 455                 || ( zExp < -1 )
 456                 || ! increment
 457                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
 458             shift64ExtraRightJamming(
 459                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
 460             zExp = 0;
 461             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
 462             if ( roundNearestEven ) {
 463                 increment = ( (sbits32) zSig2 < 0 );
 464             }
 465             else {
 466                 if ( zSign ) {
 467                     increment = ( roundingMode == float_round_down ) && zSig2;
 468                 }
 469                 else {
 470                     increment = ( roundingMode == float_round_up ) && zSig2;
 471                 }
 472             }
 473         }
 474     }
 475     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
 476     if ( increment ) {
 477         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
 478         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
 479     }
 480     else {
 481         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
 482     }
 483     return packFloat64( zSign, zExp, zSig0, zSig1 );
 484
 485 }
 486
 487 /*
 488 -------------------------------------------------------------------------------
 489 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 490 and significand formed by the concatenation of `zSig0' and `zSig1', and
 491 returns the proper double-precision floating-point value corresponding
 492 to the abstract input.  This routine is just like `roundAndPackFloat64'
 493 except that the input significand has fewer bits and does not have to be
 494 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 495 point exponent.
 496 -------------------------------------------------------------------------------
 497 */
 498 static float64
 499  normalizeRoundAndPackFloat64(
 500      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 501 {
 502     int8 shiftCount;
 503     bits32 zSig2;
 504
 505     if ( zSig0 == 0 ) {
 506         zSig0 = zSig1;
 507         zSig1 = 0;
 508         zExp -= 32;
 509     }
 510     shiftCount = countLeadingZeros32( zSig0 ) - 11;
 511     if ( 0 <= shiftCount ) {
 512         zSig2 = 0;
 513         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
 514     }
 515     else {
 516         shift64ExtraRightJamming(
 517             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
 518     }
 519     zExp -= shiftCount;
 520     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
 521
 522 }
 523
 524 /*
 525 -------------------------------------------------------------------------------
 526 Returns the result of converting the 32-bit two's complement integer `a' to
 527 the single-precision floating-point format.  The conversion is performed
 528 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 529 -------------------------------------------------------------------------------
 530 */
 531 float32 int32_to_float32( int32 a )
 532 {
 533     flag zSign;
 534
 535     if ( a == 0 ) return 0;
 536     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
 537     zSign = ( a < 0 );
 538     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
 539
 540 }
 541
 542 /*
 543 -------------------------------------------------------------------------------
 544 Returns the result of converting the 32-bit two's complement integer `a' to
 545 the double-precision floating-point format.  The conversion is performed
 546 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 547 -------------------------------------------------------------------------------
 548 */
 549 float64 int32_to_float64( int32 a )
 550 {
 551     flag zSign;
 552     bits32 absA;
 553     int8 shiftCount;
 554     bits32 zSig0, zSig1;
 555
 556     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
 557     zSign = ( a < 0 );
 558     absA = zSign ? - a : a;
 559     shiftCount = countLeadingZeros32( absA ) - 11;
 560     if ( 0 <= shiftCount ) {
 561         zSig0 = absA<<shiftCount;
 562         zSig1 = 0;
 563     }
 564     else {
 565         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
 566     }
 567     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
 568
 569 }
 570
 571 #ifndef SOFTFLOAT_FOR_GCC
 572 /*
 573 -------------------------------------------------------------------------------
 574 Returns the result of converting the single-precision floating-point value
 575 `a' to the 32-bit two's complement integer format.  The conversion is
 576 performed according to the IEC/IEEE Standard for Binary Floating-Point
 577 Arithmetic---which means in particular that the conversion is rounded
 578 according to the current rounding mode.  If `a' is a NaN, the largest
 579 positive integer is returned.  Otherwise, if the conversion overflows, the
 580 largest integer with the same sign as `a' is returned.
 581 -------------------------------------------------------------------------------
 582 */
 583 int32 float32_to_int32( float32 a )
 584 {
 585     flag aSign;
 586     int16 aExp, shiftCount;
 587     bits32 aSig, aSigExtra;
 588     int32 z;
 589     int8 roundingMode;
 590
 591     aSig = extractFloat32Frac( a );
 592     aExp = extractFloat32Exp( a );
 593     aSign = extractFloat32Sign( a );
 594     shiftCount = aExp - 0x96;
 595     if ( 0 <= shiftCount ) {
 596         if ( 0x9E <= aExp ) {
 597             if ( a != 0xCF000000 ) {
 598                 float_raise( float_flag_invalid );
 599                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
 600                     return 0x7FFFFFFF;
 601                 }
 602             }
 603             return (sbits32) 0x80000000;
 604         }
 605         z = ( aSig | 0x00800000 )<<shiftCount;
 606         if ( aSign ) z = - z;
 607     }
 608     else {
 609         if ( aExp < 0x7E ) {
 610             aSigExtra = aExp | aSig;
 611             z = 0;
 612         }
 613         else {
 614             aSig |= 0x00800000;
 615             aSigExtra = aSig<<( shiftCount & 31 );
 616             z = aSig>>( - shiftCount );
 617         }
 618         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
 619         roundingMode = float_rounding_mode;
 620         if ( roundingMode == float_round_nearest_even ) {
 621             if ( (sbits32) aSigExtra < 0 ) {
 622                 ++z;
 623                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
 624             }
 625             if ( aSign ) z = - z;
 626         }
 627         else {
 628             aSigExtra = ( aSigExtra != 0 );
 629             if ( aSign ) {
 630                 z += ( roundingMode == float_round_down ) & aSigExtra;
 631                 z = - z;
 632             }
 633             else {
 634                 z += ( roundingMode == float_round_up ) & aSigExtra;
 635             }
 636         }
 637     }
 638     return z;
 639
 640 }
 641 #endif
 642
 643 /*
 644 -------------------------------------------------------------------------------
 645 Returns the result of converting the single-precision floating-point value
 646 `a' to the 32-bit two's complement integer format.  The conversion is
 647 performed according to the IEC/IEEE Standard for Binary Floating-Point
 648 Arithmetic, except that the conversion is always rounded toward zero.
 649 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
 650 the conversion overflows, the largest integer with the same sign as `a' is
 651 returned.
 652 -------------------------------------------------------------------------------
 653 */
 654 int32 float32_to_int32_round_to_zero( float32 a )
 655 {
 656     flag aSign;
 657     int16 aExp, shiftCount;
 658     bits32 aSig;
 659     int32 z;
 660
 661     aSig = extractFloat32Frac( a );
 662     aExp = extractFloat32Exp( a );
 663     aSign = extractFloat32Sign( a );
 664     shiftCount = aExp - 0x9E;
 665     if ( 0 <= shiftCount ) {
 666         if ( a != 0xCF000000 ) {
 667             float_raise( float_flag_invalid );
 668             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
 669         }
 670         return (sbits32) 0x80000000;
 671     }
 672     else if ( aExp <= 0x7E ) {
 673         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
 674         return 0;
 675     }
 676     aSig = ( aSig | 0x00800000 )<<8;
 677     z = aSig>>( - shiftCount );
 678     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
 679         float_exception_flags |= float_flag_inexact;
 680     }
 681     if ( aSign ) z = - z;
 682     return z;
 683
 684 }
 685
 686 /*
 687 -------------------------------------------------------------------------------
 688 Returns the result of converting the single-precision floating-point value
 689 `a' to the double-precision floating-point format.  The conversion is
 690 performed according to the IEC/IEEE Standard for Binary Floating-Point
 691 Arithmetic.
 692 -------------------------------------------------------------------------------
 693 */
 694 float64 float32_to_float64( float32 a )
 695 {
 696     flag aSign;
 697     int16 aExp;
 698     bits32 aSig, zSig0, zSig1;
 699
 700     aSig = extractFloat32Frac( a );
 701     aExp = extractFloat32Exp( a );
 702     aSign = extractFloat32Sign( a );
 703     if ( aExp == 0xFF ) {
 704         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
 705         return packFloat64( aSign, 0x7FF, 0, 0 );
 706     }
 707     if ( aExp == 0 ) {
 708         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
 709         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 710         --aExp;
 711     }
 712     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
 713     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
 714
 715 }
 716
 717 #ifndef SOFTFLOAT_FOR_GCC
 718 /*
 719 -------------------------------------------------------------------------------
 720 Rounds the single-precision floating-point value `a' to an integer,
 721 and returns the result as a single-precision floating-point value.  The
 722 operation is performed according to the IEC/IEEE Standard for Binary
 723 Floating-Point Arithmetic.
 724 -------------------------------------------------------------------------------
 725 */
 726 float32 float32_round_to_int( float32 a )
 727 {
 728     flag aSign;
 729     int16 aExp;
 730     bits32 lastBitMask, roundBitsMask;
 731     int8 roundingMode;
 732     float32 z;
 733
 734     aExp = extractFloat32Exp( a );
 735     if ( 0x96 <= aExp ) {
 736         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
 737             return propagateFloat32NaN( a, a );
 738         }
 739         return a;
 740     }
 741     if ( aExp <= 0x7E ) {
 742         if ( (bits32) ( a<<1 ) == 0 ) return a;
 743         float_exception_flags |= float_flag_inexact;
 744         aSign = extractFloat32Sign( a );
 745         switch ( float_rounding_mode ) {
 746          case float_round_nearest_even:
 747             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
 748                 return packFloat32( aSign, 0x7F, 0 );
 749             }
 750             break;
 751          case float_round_to_zero:
 752             break;
 753          case float_round_down:
 754             return aSign ? 0xBF800000 : 0;
 755          case float_round_up:
 756             return aSign ? 0x80000000 : 0x3F800000;
 757         }
 758         return packFloat32( aSign, 0, 0 );
 759     }
 760     lastBitMask = 1;
 761     lastBitMask <<= 0x96 - aExp;
 762     roundBitsMask = lastBitMask - 1;
 763     z = a;
 764     roundingMode = float_rounding_mode;
 765     if ( roundingMode == float_round_nearest_even ) {
 766         z += lastBitMask>>1;
 767         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
 768     }
 769     else if ( roundingMode != float_round_to_zero ) {
 770         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
 771             z += roundBitsMask;
 772         }
 773     }
 774     z &= ~ roundBitsMask;
 775     if ( z != a ) float_exception_flags |= float_flag_inexact;
 776     return z;
 777
 778 }
 779 #endif
 780
 781 /*
 782 -------------------------------------------------------------------------------
 783 Returns the result of adding the absolute values of the single-precision
 784 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 785 before being returned.  `zSign' is ignored if the result is a NaN.
 786 The addition is performed according to the IEC/IEEE Standard for Binary
 787 Floating-Point Arithmetic.
 788 -------------------------------------------------------------------------------
 789 */
 790 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
 791 {
 792     int16 aExp, bExp, zExp;
 793     bits32 aSig, bSig, zSig;
 794     int16 expDiff;
 795
 796     aSig = extractFloat32Frac( a );
 797     aExp = extractFloat32Exp( a );
 798     bSig = extractFloat32Frac( b );
 799     bExp = extractFloat32Exp( b );
 800     expDiff = aExp - bExp;
 801     aSig <<= 6;
 802     bSig <<= 6;
 803     if ( 0 < expDiff ) {
 804         if ( aExp == 0xFF ) {
 805             if ( aSig ) return propagateFloat32NaN( a, b );
 806             return a;
 807         }
 808         if ( bExp == 0 ) {
 809             --expDiff;
 810         }
 811         else {
 812             bSig |= 0x20000000;
 813         }
 814         shift32RightJamming( bSig, expDiff, &bSig );
 815         zExp = aExp;
 816     }
 817     else if ( expDiff < 0 ) {
 818         if ( bExp == 0xFF ) {
 819             if ( bSig ) return propagateFloat32NaN( a, b );
 820             return packFloat32( zSign, 0xFF, 0 );
 821         }
 822         if ( aExp == 0 ) {
 823             ++expDiff;
 824         }
 825         else {
 826             aSig |= 0x20000000;
 827         }
 828         shift32RightJamming( aSig, - expDiff, &aSig );
 829         zExp = bExp;
 830     }
 831     else {
 832         if ( aExp == 0xFF ) {
 833             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 834             return a;
 835         }
 836         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
 837         zSig = 0x40000000 + aSig + bSig;
 838         zExp = aExp;
 839         goto roundAndPack;
 840     }
 841     aSig |= 0x20000000;
 842     zSig = ( aSig + bSig )<<1;
 843     --zExp;
 844     if ( (sbits32) zSig < 0 ) {
 845         zSig = aSig + bSig;
 846         ++zExp;
 847     }
 848  roundAndPack:
 849     return roundAndPackFloat32( zSign, zExp, zSig );
 850
 851 }
 852
 853 /*
 854 -------------------------------------------------------------------------------
 855 Returns the result of subtracting the absolute values of the single-
 856 precision floating-point values `a' and `b'.  If `zSign' is 1, the
 857 difference is negated before being returned.  `zSign' is ignored if the
 858 result is a NaN.  The subtraction is performed according to the IEC/IEEE
 859 Standard for Binary Floating-Point Arithmetic.
 860 -------------------------------------------------------------------------------
 861 */
 862 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
 863 {
 864     int16 aExp, bExp, zExp;
 865     bits32 aSig, bSig, zSig;
 866     int16 expDiff;
 867
 868     aSig = extractFloat32Frac( a );
 869     aExp = extractFloat32Exp( a );
 870     bSig = extractFloat32Frac( b );
 871     bExp = extractFloat32Exp( b );
 872     expDiff = aExp - bExp;
 873     aSig <<= 7;
 874     bSig <<= 7;
 875     if ( 0 < expDiff ) goto aExpBigger;
 876     if ( expDiff < 0 ) goto bExpBigger;
 877     if ( aExp == 0xFF ) {
 878         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 879         float_raise( float_flag_invalid );
 880         return float32_default_nan;
 881     }
 882     if ( aExp == 0 ) {
 883         aExp = 1;
 884         bExp = 1;
 885     }
 886     if ( bSig < aSig ) goto aBigger;
 887     if ( aSig < bSig ) goto bBigger;
 888     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
 889  bExpBigger:
 890     if ( bExp == 0xFF ) {
 891         if ( bSig ) return propagateFloat32NaN( a, b );
 892         return packFloat32( zSign ^ 1, 0xFF, 0 );
 893     }
 894     if ( aExp == 0 ) {
 895         ++expDiff;
 896     }
 897     else {
 898         aSig |= 0x40000000;
 899     }
 900     shift32RightJamming( aSig, - expDiff, &aSig );
 901     bSig |= 0x40000000;
 902  bBigger:
 903     zSig = bSig - aSig;
 904     zExp = bExp;
 905     zSign ^= 1;
 906     goto normalizeRoundAndPack;
 907  aExpBigger:
 908     if ( aExp == 0xFF ) {
 909         if ( aSig ) return propagateFloat32NaN( a, b );
 910         return a;
 911     }
 912     if ( bExp == 0 ) {
 913         --expDiff;
 914     }
 915     else {
 916         bSig |= 0x40000000;
 917     }
 918     shift32RightJamming( bSig, expDiff, &bSig );
 919     aSig |= 0x40000000;
 920  aBigger:
 921     zSig = aSig - bSig;
 922     zExp = aExp;
 923  normalizeRoundAndPack:
 924     --zExp;
 925     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
 926
 927 }
 928
 929 /*
 930 -------------------------------------------------------------------------------
 931 Returns the result of adding the single-precision floating-point values `a'
 932 and `b'.  The operation is performed according to the IEC/IEEE Standard for
 933 Binary Floating-Point Arithmetic.
 934 -------------------------------------------------------------------------------
 935 */
 936 float32 float32_add( float32 a, float32 b )
 937 {
 938     flag aSign, bSign;
 939
 940     aSign = extractFloat32Sign( a );
 941     bSign = extractFloat32Sign( b );
 942     if ( aSign == bSign ) {
 943         return addFloat32Sigs( a, b, aSign );
 944     }
 945     else {
 946         return subFloat32Sigs( a, b, aSign );
 947     }
 948
 949 }
 950
 951 /*
 952 -------------------------------------------------------------------------------
 953 Returns the result of subtracting the single-precision floating-point values
 954 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 955 for Binary Floating-Point Arithmetic.
 956 -------------------------------------------------------------------------------
 957 */
 958 float32 float32_sub( float32 a, float32 b )
 959 {
 960     flag aSign, bSign;
 961
 962     aSign = extractFloat32Sign( a );
 963     bSign = extractFloat32Sign( b );
 964     if ( aSign == bSign ) {
 965         return subFloat32Sigs( a, b, aSign );
 966     }
 967     else {
 968         return addFloat32Sigs( a, b, aSign );
 969     }
 970
 971 }
 972
 973 /*
 974 -------------------------------------------------------------------------------
 975 Returns the result of multiplying the single-precision floating-point values
 976 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 977 for Binary Floating-Point Arithmetic.
 978 -------------------------------------------------------------------------------
 979 */
 980 float32 float32_mul( float32 a, float32 b )
 981 {
 982     flag aSign, bSign, zSign;
 983     int16 aExp, bExp, zExp;
 984     bits32 aSig, bSig, zSig0, zSig1;
 985
 986     aSig = extractFloat32Frac( a );
 987     aExp = extractFloat32Exp( a );
 988     aSign = extractFloat32Sign( a );
 989     bSig = extractFloat32Frac( b );
 990     bExp = extractFloat32Exp( b );
 991     bSign = extractFloat32Sign( b );
 992     zSign = aSign ^ bSign;
 993     if ( aExp == 0xFF ) {
 994         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
 995             return propagateFloat32NaN( a, b );
 996         }
 997         if ( ( bExp | bSig ) == 0 ) {
 998             float_raise( float_flag_invalid );
 999             return float32_default_nan;
1000         }
1001         return packFloat32( zSign, 0xFF, 0 );
1002     }
1003     if ( bExp == 0xFF ) {
1004         if ( bSig ) return propagateFloat32NaN( a, b );
1005         if ( ( aExp | aSig ) == 0 ) {
1006             float_raise( float_flag_invalid );
1007             return float32_default_nan;
1008         }
1009         return packFloat32( zSign, 0xFF, 0 );
1010     }
1011     if ( aExp == 0 ) {
1012         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1013         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1014     }
1015     if ( bExp == 0 ) {
1016         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1017         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1018     }
1019     zExp = aExp + bExp - 0x7F;
1020     aSig = ( aSig | 0x00800000 )<<7;
1021     bSig = ( bSig | 0x00800000 )<<8;
1022     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1023     zSig0 |= ( zSig1 != 0 );
1024     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1025         zSig0 <<= 1;
1026         --zExp;
1027     }
1028     return roundAndPackFloat32( zSign, zExp, zSig0 );
1029
1030 }
1031
1032 /*
1033 -------------------------------------------------------------------------------
1034 Returns the result of dividing the single-precision floating-point value `a'
1035 by the corresponding value `b'.  The operation is performed according to the
1036 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1037 -------------------------------------------------------------------------------
1038 */
1039 float32 float32_div( float32 a, float32 b )
1040 {
1041     flag aSign, bSign, zSign;
1042     int16 aExp, bExp, zExp;
1043     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1044
1045     aSig = extractFloat32Frac( a );
1046     aExp = extractFloat32Exp( a );
1047     aSign = extractFloat32Sign( a );
1048     bSig = extractFloat32Frac( b );
1049     bExp = extractFloat32Exp( b );
1050     bSign = extractFloat32Sign( b );
1051     zSign = aSign ^ bSign;
1052     if ( aExp == 0xFF ) {
1053         if ( aSig ) return propagateFloat32NaN( a, b );
1054         if ( bExp == 0xFF ) {
1055             if ( bSig ) return propagateFloat32NaN( a, b );
1056             float_raise( float_flag_invalid );
1057             return float32_default_nan;
1058         }
1059         return packFloat32( zSign, 0xFF, 0 );
1060     }
1061     if ( bExp == 0xFF ) {
1062         if ( bSig ) return propagateFloat32NaN( a, b );
1063         return packFloat32( zSign, 0, 0 );
1064     }
1065     if ( bExp == 0 ) {
1066         if ( bSig == 0 ) {
1067             if ( ( aExp | aSig ) == 0 ) {
1068                 float_raise( float_flag_invalid );
1069                 return float32_default_nan;
1070             }
1071             float_raise( float_flag_divbyzero );
1072             return packFloat32( zSign, 0xFF, 0 );
1073         }
1074         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1075     }
1076     if ( aExp == 0 ) {
1077         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1078         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1079     }
1080     zExp = aExp - bExp + 0x7D;
1081     aSig = ( aSig | 0x00800000 )<<7;
1082     bSig = ( bSig | 0x00800000 )<<8;
1083     if ( bSig <= ( aSig + aSig ) ) {
1084         aSig >>= 1;
1085         ++zExp;
1086     }
1087     zSig = estimateDiv64To32( aSig, 0, bSig );
1088     if ( ( zSig & 0x3F ) <= 2 ) {
1089         mul32To64( bSig, zSig, &term0, &term1 );
1090         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1091         while ( (sbits32) rem0 < 0 ) {
1092             --zSig;
1093             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1094         }
1095         zSig |= ( rem1 != 0 );
1096     }
1097     return roundAndPackFloat32( zSign, zExp, zSig );
1098
1099 }
1100
1101 #ifndef SOFTFLOAT_FOR_GCC
1102 /*
1103 -------------------------------------------------------------------------------
1104 Returns the remainder of the single-precision floating-point value `a'
1105 with respect to the corresponding value `b'.  The operation is performed
1106 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1107 -------------------------------------------------------------------------------
1108 */
1109 float32 float32_rem( float32 a, float32 b )
1110 {
1111     flag aSign, bSign, zSign;
1112     int16 aExp, bExp, expDiff;
1113     bits32 aSig, bSig, q, allZero, alternateASig;
1114     sbits32 sigMean;
1115
1116     aSig = extractFloat32Frac( a );
1117     aExp = extractFloat32Exp( a );
1118     aSign = extractFloat32Sign( a );
1119     bSig = extractFloat32Frac( b );
1120     bExp = extractFloat32Exp( b );
1121     bSign = extractFloat32Sign( b );
1122     if ( aExp == 0xFF ) {
1123         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1124             return propagateFloat32NaN( a, b );
1125         }
1126         float_raise( float_flag_invalid );
1127         return float32_default_nan;
1128     }
1129     if ( bExp == 0xFF ) {
1130         if ( bSig ) return propagateFloat32NaN( a, b );
1131         return a;
1132     }
1133     if ( bExp == 0 ) {
1134         if ( bSig == 0 ) {
1135             float_raise( float_flag_invalid );
1136             return float32_default_nan;
1137         }
1138         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1139     }
1140     if ( aExp == 0 ) {
1141         if ( aSig == 0 ) return a;
1142         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1143     }
1144     expDiff = aExp - bExp;
1145     aSig = ( aSig | 0x00800000 )<<8;
1146     bSig = ( bSig | 0x00800000 )<<8;
1147     if ( expDiff < 0 ) {
1148         if ( expDiff < -1 ) return a;
1149         aSig >>= 1;
1150     }
1151     q = ( bSig <= aSig );
1152     if ( q ) aSig -= bSig;
1153     expDiff -= 32;
1154     while ( 0 < expDiff ) {
1155         q = estimateDiv64To32( aSig, 0, bSig );
1156         q = ( 2 < q ) ? q - 2 : 0;
1157         aSig = - ( ( bSig>>2 ) * q );
1158         expDiff -= 30;
1159     }
1160     expDiff += 32;
1161     if ( 0 < expDiff ) {
1162         q = estimateDiv64To32( aSig, 0, bSig );
1163         q = ( 2 < q ) ? q - 2 : 0;
1164         q >>= 32 - expDiff;
1165         bSig >>= 2;
1166         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1167     }
1168     else {
1169         aSig >>= 2;
1170         bSig >>= 2;
1171     }
1172     do {
1173         alternateASig = aSig;
1174         ++q;
1175         aSig -= bSig;
1176     } while ( 0 <= (sbits32) aSig );
1177     sigMean = aSig + alternateASig;
1178     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1179         aSig = alternateASig;
1180     }
1181     zSign = ( (sbits32) aSig < 0 );
1182     if ( zSign ) aSig = - aSig;
1183     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1184
1185 }
1186 #endif
1187
1188 #ifndef SOFTFLOAT_FOR_GCC
1189 /*
1190 -------------------------------------------------------------------------------
1191 Returns the square root of the single-precision floating-point value `a'.
1192 The operation is performed according to the IEC/IEEE Standard for Binary
1193 Floating-Point Arithmetic.
1194 -------------------------------------------------------------------------------
1195 */
1196 float32 float32_sqrt( float32 a )
1197 {
1198     flag aSign;
1199     int16 aExp, zExp;
1200     bits32 aSig, zSig, rem0, rem1, term0, term1;
1201
1202     aSig = extractFloat32Frac( a );
1203     aExp = extractFloat32Exp( a );
1204     aSign = extractFloat32Sign( a );
1205     if ( aExp == 0xFF ) {
1206         if ( aSig ) return propagateFloat32NaN( a, 0 );
1207         if ( ! aSign ) return a;
1208         float_raise( float_flag_invalid );
1209         return float32_default_nan;
1210     }
1211     if ( aSign ) {
1212         if ( ( aExp | aSig ) == 0 ) return a;
1213         float_raise( float_flag_invalid );
1214         return float32_default_nan;
1215     }
1216     if ( aExp == 0 ) {
1217         if ( aSig == 0 ) return 0;
1218         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1219     }
1220     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1221     aSig = ( aSig | 0x00800000 )<<8;
1222     zSig = estimateSqrt32( aExp, aSig ) + 2;
1223     if ( ( zSig & 0x7F ) <= 5 ) {
1224         if ( zSig < 2 ) {
1225             zSig = 0x7FFFFFFF;
1226             goto roundAndPack;
1227         }
1228         else {
1229             aSig >>= aExp & 1;
1230             mul32To64( zSig, zSig, &term0, &term1 );
1231             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1232             while ( (sbits32) rem0 < 0 ) {
1233                 --zSig;
1234                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1235                 term1 |= 1;
1236                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1237             }
1238             zSig |= ( ( rem0 | rem1 ) != 0 );
1239         }
1240     }
1241     shift32RightJamming( zSig, 1, &zSig );
1242  roundAndPack:
1243     return roundAndPackFloat32( 0, zExp, zSig );
1244
1245 }
1246 #endif
1247
1248 /*
1249 -------------------------------------------------------------------------------
1250 Returns 1 if the single-precision floating-point value `a' is equal to
1251 the corresponding value `b', and 0 otherwise.  The comparison is performed
1252 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253 -------------------------------------------------------------------------------
1254 */
1255 flag float32_eq( float32 a, float32 b )
1256 {
1257
1258     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1259          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1260        ) {
1261         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1262             float_raise( float_flag_invalid );
1263         }
1264         return 0;
1265     }
1266     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1267
1268 }
1269
1270 /*
1271 -------------------------------------------------------------------------------
1272 Returns 1 if the single-precision floating-point value `a' is less than
1273 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1274 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1275 Arithmetic.
1276 -------------------------------------------------------------------------------
1277 */
1278 flag float32_le( float32 a, float32 b )
1279 {
1280     flag aSign, bSign;
1281
1282     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1283          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1284        ) {
1285         float_raise( float_flag_invalid );
1286         return 0;
1287     }
1288     aSign = extractFloat32Sign( a );
1289     bSign = extractFloat32Sign( b );
1290     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1291     return ( a == b ) || ( aSign ^ ( a < b ) );
1292
1293 }
1294
1295 /*
1296 -------------------------------------------------------------------------------
1297 Returns 1 if the single-precision floating-point value `a' is less than
1298 the corresponding value `b', and 0 otherwise.  The comparison is performed
1299 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1300 -------------------------------------------------------------------------------
1301 */
1302 flag float32_lt( float32 a, float32 b )
1303 {
1304     flag aSign, bSign;
1305
1306     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1307          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1308        ) {
1309         float_raise( float_flag_invalid );
1310         return 0;
1311     }
1312     aSign = extractFloat32Sign( a );
1313     bSign = extractFloat32Sign( b );
1314     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1315     return ( a != b ) && ( aSign ^ ( a < b ) );
1316
1317 }
1318
1319 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1320 /*
1321 -------------------------------------------------------------------------------
1322 Returns 1 if the single-precision floating-point value `a' is equal to
1323 the corresponding value `b', and 0 otherwise.  The invalid exception is
1324 raised if either operand is a NaN.  Otherwise, the comparison is performed
1325 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1326 -------------------------------------------------------------------------------
1327 */
1328 flag float32_eq_signaling( float32 a, float32 b )
1329 {
1330
1331     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1332          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1333        ) {
1334         float_raise( float_flag_invalid );
1335         return 0;
1336     }
1337     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1338
1339 }
1340
1341 /*
1342 -------------------------------------------------------------------------------
1343 Returns 1 if the single-precision floating-point value `a' is less than or
1344 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1345 cause an exception.  Otherwise, the comparison is performed according to the
1346 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1347 -------------------------------------------------------------------------------
1348 */
1349 flag float32_le_quiet( float32 a, float32 b )
1350 {
1351     flag aSign, bSign;
1352     int16 aExp, bExp;
1353
1354     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1355          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1356        ) {
1357         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1358             float_raise( float_flag_invalid );
1359         }
1360         return 0;
1361     }
1362     aSign = extractFloat32Sign( a );
1363     bSign = extractFloat32Sign( b );
1364     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1365     return ( a == b ) || ( aSign ^ ( a < b ) );
1366
1367 }
1368
1369 /*
1370 -------------------------------------------------------------------------------
1371 Returns 1 if the single-precision floating-point value `a' is less than
1372 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1373 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1374 Standard for Binary Floating-Point Arithmetic.
1375 -------------------------------------------------------------------------------
1376 */
1377 flag float32_lt_quiet( float32 a, float32 b )
1378 {
1379     flag aSign, bSign;
1380
1381     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1382          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1383        ) {
1384         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1385             float_raise( float_flag_invalid );
1386         }
1387         return 0;
1388     }
1389     aSign = extractFloat32Sign( a );
1390     bSign = extractFloat32Sign( b );
1391     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1392     return ( a != b ) && ( aSign ^ ( a < b ) );
1393
1394 }
1395 #endif /* !SOFTFLOAT_FOR_GCC */
1396
1397 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1398 /*
1399 -------------------------------------------------------------------------------
1400 Returns the result of converting the double-precision floating-point value
1401 `a' to the 32-bit two's complement integer format.  The conversion is
1402 performed according to the IEC/IEEE Standard for Binary Floating-Point
1403 Arithmetic---which means in particular that the conversion is rounded
1404 according to the current rounding mode.  If `a' is a NaN, the largest
1405 positive integer is returned.  Otherwise, if the conversion overflows, the
1406 largest integer with the same sign as `a' is returned.
1407 -------------------------------------------------------------------------------
1408 */
1409 int32 float64_to_int32( float64 a )
1410 {
1411     flag aSign;
1412     int16 aExp, shiftCount;
1413     bits32 aSig0, aSig1, absZ, aSigExtra;
1414     int32 z;
1415     int8 roundingMode;
1416
1417     aSig1 = extractFloat64Frac1( a );
1418     aSig0 = extractFloat64Frac0( a );
1419     aExp = extractFloat64Exp( a );
1420     aSign = extractFloat64Sign( a );
1421     shiftCount = aExp - 0x413;
1422     if ( 0 <= shiftCount ) {
1423         if ( 0x41E < aExp ) {
1424             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1425             goto invalid;
1426         }
1427         shortShift64Left(
1428             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1429         if ( 0x80000000 < absZ ) goto invalid;
1430     }
1431     else {
1432         aSig1 = ( aSig1 != 0 );
1433         if ( aExp < 0x3FE ) {
1434             aSigExtra = aExp | aSig0 | aSig1;
1435             absZ = 0;
1436         }
1437         else {
1438             aSig0 |= 0x00100000;
1439             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1440             absZ = aSig0>>( - shiftCount );
1441         }
1442     }
1443     roundingMode = float_rounding_mode;
1444     if ( roundingMode == float_round_nearest_even ) {
1445         if ( (sbits32) aSigExtra < 0 ) {
1446             ++absZ;
1447             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1448         }
1449         z = aSign ? - absZ : absZ;
1450     }
1451     else {
1452         aSigExtra = ( aSigExtra != 0 );
1453         if ( aSign ) {
1454             z = - (   absZ
1455                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1456         }
1457         else {
1458             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1459         }
1460     }
1461     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1462  invalid:
1463         float_raise( float_flag_invalid );
1464         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1465     }
1466     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1467     return z;
1468
1469 }
1470 #endif /* !SOFTFLOAT_FOR_GCC */
1471
1472 /*
1473 -------------------------------------------------------------------------------
1474 Returns the result of converting the double-precision floating-point value
1475 `a' to the 32-bit two's complement integer format.  The conversion is
1476 performed according to the IEC/IEEE Standard for Binary Floating-Point
1477 Arithmetic, except that the conversion is always rounded toward zero.
1478 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1479 the conversion overflows, the largest integer with the same sign as `a' is
1480 returned.
1481 -------------------------------------------------------------------------------
1482 */
1483 int32 float64_to_int32_round_to_zero( float64 a )
1484 {
1485     flag aSign;
1486     int16 aExp, shiftCount;
1487     bits32 aSig0, aSig1, absZ, aSigExtra;
1488     int32 z;
1489
1490     aSig1 = extractFloat64Frac1( a );
1491     aSig0 = extractFloat64Frac0( a );
1492     aExp = extractFloat64Exp( a );
1493     aSign = extractFloat64Sign( a );
1494     shiftCount = aExp - 0x413;
1495     if ( 0 <= shiftCount ) {
1496         if ( 0x41E < aExp ) {
1497             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1498             goto invalid;
1499         }
1500         shortShift64Left(
1501             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1502     }
1503     else {
1504         if ( aExp < 0x3FF ) {
1505             if ( aExp | aSig0 | aSig1 ) {
1506                 float_exception_flags |= float_flag_inexact;
1507             }
1508             return 0;
1509         }
1510         aSig0 |= 0x00100000;
1511         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1512         absZ = aSig0>>( - shiftCount );
1513     }
1514     z = aSign ? - absZ : absZ;
1515     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1516  invalid:
1517         float_raise( float_flag_invalid );
1518         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1519     }
1520     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1521     return z;
1522
1523 }
1524
1525 /*
1526 -------------------------------------------------------------------------------
1527 Returns the result of converting the double-precision floating-point value
1528 `a' to the single-precision floating-point format.  The conversion is
1529 performed according to the IEC/IEEE Standard for Binary Floating-Point
1530 Arithmetic.
1531 -------------------------------------------------------------------------------
1532 */
1533 float32 float64_to_float32( float64 a )
1534 {
1535     flag aSign;
1536     int16 aExp;
1537     bits32 aSig0, aSig1, zSig;
1538     bits32 allZero;
1539
1540     aSig1 = extractFloat64Frac1( a );
1541     aSig0 = extractFloat64Frac0( a );
1542     aExp = extractFloat64Exp( a );
1543     aSign = extractFloat64Sign( a );
1544     if ( aExp == 0x7FF ) {
1545         if ( aSig0 | aSig1 ) {
1546             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1547         }
1548         return packFloat32( aSign, 0xFF, 0 );
1549     }
1550     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1551     if ( aExp ) zSig |= 0x40000000;
1552     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1553
1554 }
1555
1556 #ifndef SOFTFLOAT_FOR_GCC
1557 /*
1558 -------------------------------------------------------------------------------
1559 Rounds the double-precision floating-point value `a' to an integer,
1560 and returns the result as a double-precision floating-point value.  The
1561 operation is performed according to the IEC/IEEE Standard for Binary
1562 Floating-Point Arithmetic.
1563 -------------------------------------------------------------------------------
1564 */
1565 float64 float64_round_to_int( float64 a )
1566 {
1567     flag aSign;
1568     int16 aExp;
1569     bits32 lastBitMask, roundBitsMask;
1570     int8 roundingMode;
1571     float64 z;
1572
1573     aExp = extractFloat64Exp( a );
1574     if ( 0x413 <= aExp ) {
1575         if ( 0x433 <= aExp ) {
1576             if (    ( aExp == 0x7FF )
1577                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1578                 return propagateFloat64NaN( a, a );
1579             }
1580             return a;
1581         }
1582         lastBitMask = 1;
1583         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1584         roundBitsMask = lastBitMask - 1;
1585         z = a;
1586         roundingMode = float_rounding_mode;
1587         if ( roundingMode == float_round_nearest_even ) {
1588             if ( lastBitMask ) {
1589                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1590                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1591             }
1592             else {
1593                 if ( (sbits32) z.low < 0 ) {
1594                     ++z.high;
1595                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1596                 }
1597             }
1598         }
1599         else if ( roundingMode != float_round_to_zero ) {
1600             if (   extractFloat64Sign( z )
1601                  ^ ( roundingMode == float_round_up ) ) {
1602                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1603             }
1604         }
1605         z.low &= ~ roundBitsMask;
1606     }
1607     else {
1608         if ( aExp <= 0x3FE ) {
1609             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1610             float_exception_flags |= float_flag_inexact;
1611             aSign = extractFloat64Sign( a );
1612             switch ( float_rounding_mode ) {
1613              case float_round_nearest_even:
1614                 if (    ( aExp == 0x3FE )
1615                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1616                    ) {
1617                     return packFloat64( aSign, 0x3FF, 0, 0 );
1618                 }
1619                 break;
1620              case float_round_down:
1621                 return
1622                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1623                     : packFloat64( 0, 0, 0, 0 );
1624              case float_round_up:
1625                 return
1626                       aSign ? packFloat64( 1, 0, 0, 0 )
1627                     : packFloat64( 0, 0x3FF, 0, 0 );
1628             }
1629             return packFloat64( aSign, 0, 0, 0 );
1630         }
1631         lastBitMask = 1;
1632         lastBitMask <<= 0x413 - aExp;
1633         roundBitsMask = lastBitMask - 1;
1634         z.low = 0;
1635         z.high = a.high;
1636         roundingMode = float_rounding_mode;
1637         if ( roundingMode == float_round_nearest_even ) {
1638             z.high += lastBitMask>>1;
1639             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1640                 z.high &= ~ lastBitMask;
1641             }
1642         }
1643         else if ( roundingMode != float_round_to_zero ) {
1644             if (   extractFloat64Sign( z )
1645                  ^ ( roundingMode == float_round_up ) ) {
1646                 z.high |= ( a.low != 0 );
1647                 z.high += roundBitsMask;
1648             }
1649         }
1650         z.high &= ~ roundBitsMask;
1651     }
1652     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1653         float_exception_flags |= float_flag_inexact;
1654     }
1655     return z;
1656
1657 }
1658 #endif
1659
1660 /*
1661 -------------------------------------------------------------------------------
1662 Returns the result of adding the absolute values of the double-precision
1663 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1664 before being returned.  `zSign' is ignored if the result is a NaN.
1665 The addition is performed according to the IEC/IEEE Standard for Binary
1666 Floating-Point Arithmetic.
1667 -------------------------------------------------------------------------------
1668 */
1669 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1670 {
1671     int16 aExp, bExp, zExp;
1672     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1673     int16 expDiff;
1674
1675     aSig1 = extractFloat64Frac1( a );
1676     aSig0 = extractFloat64Frac0( a );
1677     aExp = extractFloat64Exp( a );
1678     bSig1 = extractFloat64Frac1( b );
1679     bSig0 = extractFloat64Frac0( b );
1680     bExp = extractFloat64Exp( b );
1681     expDiff = aExp - bExp;
1682     if ( 0 < expDiff ) {
1683         if ( aExp == 0x7FF ) {
1684             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1685             return a;
1686         }
1687         if ( bExp == 0 ) {
1688             --expDiff;
1689         }
1690         else {
1691             bSig0 |= 0x00100000;
1692         }
1693         shift64ExtraRightJamming(
1694             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1695         zExp = aExp;
1696     }
1697     else if ( expDiff < 0 ) {
1698         if ( bExp == 0x7FF ) {
1699             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1700             return packFloat64( zSign, 0x7FF, 0, 0 );
1701         }
1702         if ( aExp == 0 ) {
1703             ++expDiff;
1704         }
1705         else {
1706             aSig0 |= 0x00100000;
1707         }
1708         shift64ExtraRightJamming(
1709             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1710         zExp = bExp;
1711     }
1712     else {
1713         if ( aExp == 0x7FF ) {
1714             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1715                 return propagateFloat64NaN( a, b );
1716             }
1717             return a;
1718         }
1719         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1720         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1721         zSig2 = 0;
1722         zSig0 |= 0x00200000;
1723         zExp = aExp;
1724         goto shiftRight1;
1725     }
1726     aSig0 |= 0x00100000;
1727     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1728     --zExp;
1729     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1730     ++zExp;
1731  shiftRight1:
1732     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1733  roundAndPack:
1734     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1735
1736 }
1737
1738 /*
1739 -------------------------------------------------------------------------------
1740 Returns the result of subtracting the absolute values of the double-
1741 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1742 difference is negated before being returned.  `zSign' is ignored if the
1743 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1744 Standard for Binary Floating-Point Arithmetic.
1745 -------------------------------------------------------------------------------
1746 */
1747 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1748 {
1749     int16 aExp, bExp, zExp;
1750     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1751     int16 expDiff;
1752
1753     aSig1 = extractFloat64Frac1( a );
1754     aSig0 = extractFloat64Frac0( a );
1755     aExp = extractFloat64Exp( a );
1756     bSig1 = extractFloat64Frac1( b );
1757     bSig0 = extractFloat64Frac0( b );
1758     bExp = extractFloat64Exp( b );
1759     expDiff = aExp - bExp;
1760     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1761     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1762     if ( 0 < expDiff ) goto aExpBigger;
1763     if ( expDiff < 0 ) goto bExpBigger;
1764     if ( aExp == 0x7FF ) {
1765         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1766             return propagateFloat64NaN( a, b );
1767         }
1768         float_raise( float_flag_invalid );
1769         return float64_default_nan;
1770     }
1771     if ( aExp == 0 ) {
1772         aExp = 1;
1773         bExp = 1;
1774     }
1775     if ( bSig0 < aSig0 ) goto aBigger;
1776     if ( aSig0 < bSig0 ) goto bBigger;
1777     if ( bSig1 < aSig1 ) goto aBigger;
1778     if ( aSig1 < bSig1 ) goto bBigger;
1779     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1780  bExpBigger:
1781     if ( bExp == 0x7FF ) {
1782         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1783         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1784     }
1785     if ( aExp == 0 ) {
1786         ++expDiff;
1787     }
1788     else {
1789         aSig0 |= 0x40000000;
1790     }
1791     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1792     bSig0 |= 0x40000000;
1793  bBigger:
1794     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1795     zExp = bExp;
1796     zSign ^= 1;
1797     goto normalizeRoundAndPack;
1798  aExpBigger:
1799     if ( aExp == 0x7FF ) {
1800         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1801         return a;
1802     }
1803     if ( bExp == 0 ) {
1804         --expDiff;
1805     }
1806     else {
1807         bSig0 |= 0x40000000;
1808     }
1809     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1810     aSig0 |= 0x40000000;
1811  aBigger:
1812     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1813     zExp = aExp;
1814  normalizeRoundAndPack:
1815     --zExp;
1816     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1817
1818 }
1819
1820 /*
1821 -------------------------------------------------------------------------------
1822 Returns the result of adding the double-precision floating-point values `a'
1823 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1824 Binary Floating-Point Arithmetic.
1825 -------------------------------------------------------------------------------
1826 */
1827 float64 float64_add( float64 a, float64 b )
1828 {
1829     flag aSign, bSign;
1830
1831     aSign = extractFloat64Sign( a );
1832     bSign = extractFloat64Sign( b );
1833     if ( aSign == bSign ) {
1834         return addFloat64Sigs( a, b, aSign );
1835     }
1836     else {
1837         return subFloat64Sigs( a, b, aSign );
1838     }
1839
1840 }
1841
1842 /*
1843 -------------------------------------------------------------------------------
1844 Returns the result of subtracting the double-precision floating-point values
1845 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1846 for Binary Floating-Point Arithmetic.
1847 -------------------------------------------------------------------------------
1848 */
1849 float64 float64_sub( float64 a, float64 b )
1850 {
1851     flag aSign, bSign;
1852
1853     aSign = extractFloat64Sign( a );
1854     bSign = extractFloat64Sign( b );
1855     if ( aSign == bSign ) {
1856         return subFloat64Sigs( a, b, aSign );
1857     }
1858     else {
1859         return addFloat64Sigs( a, b, aSign );
1860     }
1861
1862 }
1863
1864 /*
1865 -------------------------------------------------------------------------------
1866 Returns the result of multiplying the double-precision floating-point values
1867 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1868 for Binary Floating-Point Arithmetic.
1869 -------------------------------------------------------------------------------
1870 */
1871 float64 float64_mul( float64 a, float64 b )
1872 {
1873     flag aSign, bSign, zSign;
1874     int16 aExp, bExp, zExp;
1875     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1876
1877     aSig1 = extractFloat64Frac1( a );
1878     aSig0 = extractFloat64Frac0( a );
1879     aExp = extractFloat64Exp( a );
1880     aSign = extractFloat64Sign( a );
1881     bSig1 = extractFloat64Frac1( b );
1882     bSig0 = extractFloat64Frac0( b );
1883     bExp = extractFloat64Exp( b );
1884     bSign = extractFloat64Sign( b );
1885     zSign = aSign ^ bSign;
1886     if ( aExp == 0x7FF ) {
1887         if (    ( aSig0 | aSig1 )
1888              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1889             return propagateFloat64NaN( a, b );
1890         }
1891         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1892         return packFloat64( zSign, 0x7FF, 0, 0 );
1893     }
1894     if ( bExp == 0x7FF ) {
1895         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1896         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1897  invalid:
1898             float_raise( float_flag_invalid );
1899             return float64_default_nan;
1900         }
1901         return packFloat64( zSign, 0x7FF, 0, 0 );
1902     }
1903     if ( aExp == 0 ) {
1904         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1905         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1906     }
1907     if ( bExp == 0 ) {
1908         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1909         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1910     }
1911     zExp = aExp + bExp - 0x400;
1912     aSig0 |= 0x00100000;
1913     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1914     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1915     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1916     zSig2 |= ( zSig3 != 0 );
1917     if ( 0x00200000 <= zSig0 ) {
1918         shift64ExtraRightJamming(
1919             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1920         ++zExp;
1921     }
1922     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1923
1924 }
1925
1926 /*
1927 -------------------------------------------------------------------------------
1928 Returns the result of dividing the double-precision floating-point value `a'
1929 by the corresponding value `b'.  The operation is performed according to the
1930 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1931 -------------------------------------------------------------------------------
1932 */
1933 float64 float64_div( float64 a, float64 b )
1934 {
1935     flag aSign, bSign, zSign;
1936     int16 aExp, bExp, zExp;
1937     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1938     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1939
1940     aSig1 = extractFloat64Frac1( a );
1941     aSig0 = extractFloat64Frac0( a );
1942     aExp = extractFloat64Exp( a );
1943     aSign = extractFloat64Sign( a );
1944     bSig1 = extractFloat64Frac1( b );
1945     bSig0 = extractFloat64Frac0( b );
1946     bExp = extractFloat64Exp( b );
1947     bSign = extractFloat64Sign( b );
1948     zSign = aSign ^ bSign;
1949     if ( aExp == 0x7FF ) {
1950         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1951         if ( bExp == 0x7FF ) {
1952             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1953             goto invalid;
1954         }
1955         return packFloat64( zSign, 0x7FF, 0, 0 );
1956     }
1957     if ( bExp == 0x7FF ) {
1958         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959         return packFloat64( zSign, 0, 0, 0 );
1960     }
1961     if ( bExp == 0 ) {
1962         if ( ( bSig0 | bSig1 ) == 0 ) {
1963             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1964  invalid:
1965                 float_raise( float_flag_invalid );
1966                 return float64_default_nan;
1967             }
1968             float_raise( float_flag_divbyzero );
1969             return packFloat64( zSign, 0x7FF, 0, 0 );
1970         }
1971         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1972     }
1973     if ( aExp == 0 ) {
1974         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1975         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1976     }
1977     zExp = aExp - bExp + 0x3FD;
1978     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1979     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1980     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1981         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1982         ++zExp;
1983     }
1984     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1985     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1986     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1987     while ( (sbits32) rem0 < 0 ) {
1988         --zSig0;
1989         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1990     }
1991     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1992     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1993         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1994         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1995         while ( (sbits32) rem1 < 0 ) {
1996             --zSig1;
1997             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1998         }
1999         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2000     }
2001     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2002     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2003
2004 }
2005
2006 #ifndef SOFTFLOAT_FOR_GCC
2007 /*
2008 -------------------------------------------------------------------------------
2009 Returns the remainder of the double-precision floating-point value `a'
2010 with respect to the corresponding value `b'.  The operation is performed
2011 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2012 -------------------------------------------------------------------------------
2013 */
2014 float64 float64_rem( float64 a, float64 b )
2015 {
2016     flag aSign, bSign, zSign;
2017     int16 aExp, bExp, expDiff;
2018     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2019     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2020     sbits32 sigMean0;
2021     float64 z;
2022
2023     aSig1 = extractFloat64Frac1( a );
2024     aSig0 = extractFloat64Frac0( a );
2025     aExp = extractFloat64Exp( a );
2026     aSign = extractFloat64Sign( a );
2027     bSig1 = extractFloat64Frac1( b );
2028     bSig0 = extractFloat64Frac0( b );
2029     bExp = extractFloat64Exp( b );
2030     bSign = extractFloat64Sign( b );
2031     if ( aExp == 0x7FF ) {
2032         if (    ( aSig0 | aSig1 )
2033              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2034             return propagateFloat64NaN( a, b );
2035         }
2036         goto invalid;
2037     }
2038     if ( bExp == 0x7FF ) {
2039         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2040         return a;
2041     }
2042     if ( bExp == 0 ) {
2043         if ( ( bSig0 | bSig1 ) == 0 ) {
2044  invalid:
2045             float_raise( float_flag_invalid );
2046             return float64_default_nan;
2047         }
2048         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2049     }
2050     if ( aExp == 0 ) {
2051         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2052         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2053     }
2054     expDiff = aExp - bExp;
2055     if ( expDiff < -1 ) return a;
2056     shortShift64Left(
2057         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2058     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2059     q = le64( bSig0, bSig1, aSig0, aSig1 );
2060     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2061     expDiff -= 32;
2062     while ( 0 < expDiff ) {
2063         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2064         q = ( 4 < q ) ? q - 4 : 0;
2065         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2066         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2067         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2068         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2069         expDiff -= 29;
2070     }
2071     if ( -32 < expDiff ) {
2072         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2073         q = ( 4 < q ) ? q - 4 : 0;
2074         q >>= - expDiff;
2075         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2076         expDiff += 24;
2077         if ( expDiff < 0 ) {
2078             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2079         }
2080         else {
2081             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2082         }
2083         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2084         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2085     }
2086     else {
2087         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2088         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2089     }
2090     do {
2091         alternateASig0 = aSig0;
2092         alternateASig1 = aSig1;
2093         ++q;
2094         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2095     } while ( 0 <= (sbits32) aSig0 );
2096     add64(
2097         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2098     if (    ( sigMean0 < 0 )
2099          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2100         aSig0 = alternateASig0;
2101         aSig1 = alternateASig1;
2102     }
2103     zSign = ( (sbits32) aSig0 < 0 );
2104     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2105     return
2106         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2107
2108 }
2109 #endif
2110
2111 #ifndef SOFTFLOAT_FOR_GCC
2112 /*
2113 -------------------------------------------------------------------------------
2114 Returns the square root of the double-precision floating-point value `a'.
2115 The operation is performed according to the IEC/IEEE Standard for Binary
2116 Floating-Point Arithmetic.
2117 -------------------------------------------------------------------------------
2118 */
2119 float64 float64_sqrt( float64 a )
2120 {
2121     flag aSign;
2122     int16 aExp, zExp;
2123     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2124     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2125     float64 z;
2126
2127     aSig1 = extractFloat64Frac1( a );
2128     aSig0 = extractFloat64Frac0( a );
2129     aExp = extractFloat64Exp( a );
2130     aSign = extractFloat64Sign( a );
2131     if ( aExp == 0x7FF ) {
2132         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2133         if ( ! aSign ) return a;
2134         goto invalid;
2135     }
2136     if ( aSign ) {
2137         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2138  invalid:
2139         float_raise( float_flag_invalid );
2140         return float64_default_nan;
2141     }
2142     if ( aExp == 0 ) {
2143         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2144         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2145     }
2146     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2147     aSig0 |= 0x00100000;
2148     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2149     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2150     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2151     doubleZSig0 = zSig0 + zSig0;
2152     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2153     mul32To64( zSig0, zSig0, &term0, &term1 );
2154     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2155     while ( (sbits32) rem0 < 0 ) {
2156         --zSig0;
2157         doubleZSig0 -= 2;
2158         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2159     }
2160     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2161     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2162         if ( zSig1 == 0 ) zSig1 = 1;
2163         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2164         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2165         mul32To64( zSig1, zSig1, &term2, &term3 );
2166         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2167         while ( (sbits32) rem1 < 0 ) {
2168             --zSig1;
2169             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2170             term3 |= 1;
2171             term2 |= doubleZSig0;
2172             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2173         }
2174         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2175     }
2176     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2177     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2178
2179 }
2180 #endif
2181
2182 /*
2183 -------------------------------------------------------------------------------
2184 Returns 1 if the double-precision floating-point value `a' is equal to
2185 the corresponding value `b', and 0 otherwise.  The comparison is performed
2186 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2187 -------------------------------------------------------------------------------
2188 */
2189 flag float64_eq( float64 a, float64 b )
2190 {
2191
2192     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2193               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2194          || (    ( extractFloat64Exp( b ) == 0x7FF )
2195               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2196        ) {
2197         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2198             float_raise( float_flag_invalid );
2199         }
2200         return 0;
2201     }
2202     return ( a == b ) ||
2203         ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2204
2205 }
2206
2207 /*
2208 -------------------------------------------------------------------------------
2209 Returns 1 if the double-precision floating-point value `a' is less than
2210 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2211 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2212 Arithmetic.
2213 -------------------------------------------------------------------------------
2214 */
2215 flag float64_le( float64 a, float64 b )
2216 {
2217     flag aSign, bSign;
2218
2219     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2220               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2221          || (    ( extractFloat64Exp( b ) == 0x7FF )
2222               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2223        ) {
2224         float_raise( float_flag_invalid );
2225         return 0;
2226     }
2227     aSign = extractFloat64Sign( a );
2228     bSign = extractFloat64Sign( b );
2229     if ( aSign != bSign )
2230         return aSign ||
2231             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2232               0 );
2233     return ( a == b ) ||
2234         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2235 }
2236
2237 /*
2238 -------------------------------------------------------------------------------
2239 Returns 1 if the double-precision floating-point value `a' is less than
2240 the corresponding value `b', and 0 otherwise.  The comparison is performed
2241 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2242 -------------------------------------------------------------------------------
2243 */
2244 flag float64_lt( float64 a, float64 b )
2245 {
2246     flag aSign, bSign;
2247
2248     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2249               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2250          || (    ( extractFloat64Exp( b ) == 0x7FF )
2251               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2252        ) {
2253         float_raise( float_flag_invalid );
2254         return 0;
2255     }
2256     aSign = extractFloat64Sign( a );
2257     bSign = extractFloat64Sign( b );
2258     if ( aSign != bSign )
2259         return aSign &&
2260             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2261               0 );
2262     return ( a != b ) &&
2263         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2264
2265 }
2266
2267 #ifndef SOFTFLOAT_FOR_GCC
2268 /*
2269 -------------------------------------------------------------------------------
2270 Returns 1 if the double-precision floating-point value `a' is equal to
2271 the corresponding value `b', and 0 otherwise.  The invalid exception is
2272 raised if either operand is a NaN.  Otherwise, the comparison is performed
2273 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2274 -------------------------------------------------------------------------------
2275 */
2276 flag float64_eq_signaling( float64 a, float64 b )
2277 {
2278
2279     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2280               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2281          || (    ( extractFloat64Exp( b ) == 0x7FF )
2282               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2283        ) {
2284         float_raise( float_flag_invalid );
2285         return 0;
2286     }
2287     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2288
2289 }
2290
2291 /*
2292 -------------------------------------------------------------------------------
2293 Returns 1 if the double-precision floating-point value `a' is less than or
2294 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2295 cause an exception.  Otherwise, the comparison is performed according to the
2296 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2297 -------------------------------------------------------------------------------
2298 */
2299 flag float64_le_quiet( float64 a, float64 b )
2300 {
2301     flag aSign, bSign;
2302
2303     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2304               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2305          || (    ( extractFloat64Exp( b ) == 0x7FF )
2306               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2307        ) {
2308         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2309             float_raise( float_flag_invalid );
2310         }
2311         return 0;
2312     }
2313     aSign = extractFloat64Sign( a );
2314     bSign = extractFloat64Sign( b );
2315     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2316     return ( a == b ) || ( aSign ^ ( a < b ) );
2317
2318 }
2319
2320 /*
2321 -------------------------------------------------------------------------------
2322 Returns 1 if the double-precision floating-point value `a' is less than
2323 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2324 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2325 Standard for Binary Floating-Point Arithmetic.
2326 -------------------------------------------------------------------------------
2327 */
2328 flag float64_lt_quiet( float64 a, float64 b )
2329 {
2330     flag aSign, bSign;
2331
2332     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2333               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2334          || (    ( extractFloat64Exp( b ) == 0x7FF )
2335               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2336        ) {
2337         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2338             float_raise( float_flag_invalid );
2339         }
2340         return 0;
2341     }
2342     aSign = extractFloat64Sign( a );
2343     bSign = extractFloat64Sign( b );
2344     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2345     return ( a != b ) && ( aSign ^ ( a < b ) );
2346
2347 }
2348
2349 #endif