libgcc/config/xtensa/ieee754-sf.S

   1 /* IEEE-754 single-precision functions for Xtensa
   2    Copyright (C) 2006-2013 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negsf2
  44
  45         .align  4
  46         .global __negsf2
  47         .type   __negsf2, @function
  48 __negsf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     a2, a2, a4
  52         leaf_return
  53
  54 #endif /* L_negsf2 */
  55
  56 #ifdef L_addsubsf3
  57
  58         /* Addition */
  59 __addsf3_aux:
  60
  61         /* Handle NaNs and Infinities.  (This code is placed before the
  62            start of the function just to keep it in range of the limited
  63            branch displacements.)  */
  64
  65 .Ladd_xnan_or_inf:
  66         /* If y is neither Infinity nor NaN, return x.  */
  67         bnall   a3, a6, 1f
  68         /* If x is a NaN, return it.  Otherwise, return y.  */
  69         slli    a7, a2, 9
  70         beqz    a7, .Ladd_ynan_or_inf
  71 1:      leaf_return
  72
  73 .Ladd_ynan_or_inf:
  74         /* Return y.  */
  75         mov     a2, a3
  76         leaf_return
  77
  78 .Ladd_opposite_signs:
  79         /* Operand signs differ.  Do a subtraction.  */
  80         slli    a7, a6, 8
  81         xor     a3, a3, a7
  82         j       .Lsub_same_sign
  83
  84         .align  4
  85         .global __addsf3
  86         .type   __addsf3, @function
  87 __addsf3:
  88         leaf_entry sp, 16
  89         movi    a6, 0x7f800000
  90
  91         /* Check if the two operands have the same sign.  */
  92         xor     a7, a2, a3
  93         bltz    a7, .Ladd_opposite_signs
  94
  95 .Ladd_same_sign:
  96         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
  97         ball    a2, a6, .Ladd_xnan_or_inf
  98         ball    a3, a6, .Ladd_ynan_or_inf
  99
 100         /* Compare the exponents.  The smaller operand will be shifted
 101            right by the exponent difference and added to the larger
 102            one.  */
 103         extui   a7, a2, 23, 9
 104         extui   a8, a3, 23, 9
 105         bltu    a7, a8, .Ladd_shiftx
 106
 107 .Ladd_shifty:
 108         /* Check if the smaller (or equal) exponent is zero.  */
 109         bnone   a3, a6, .Ladd_yexpzero
 110
 111         /* Replace y sign/exponent with 0x008.  */
 112         or      a3, a3, a6
 113         slli    a3, a3, 8
 114         srli    a3, a3, 8
 115
 116 .Ladd_yexpdiff:
 117         /* Compute the exponent difference.  */
 118         sub     a10, a7, a8
 119
 120         /* Exponent difference > 32 -- just return the bigger value.  */
 121         bgeui   a10, 32, 1f
 122
 123         /* Shift y right by the exponent difference.  Any bits that are
 124            shifted out of y are saved in a9 for rounding the result.  */
 125         ssr     a10
 126         movi    a9, 0
 127         src     a9, a3, a9
 128         srl     a3, a3
 129
 130         /* Do the addition.  */
 131         add     a2, a2, a3
 132
 133         /* Check if the add overflowed into the exponent.  */
 134         extui   a10, a2, 23, 9
 135         beq     a10, a7, .Ladd_round
 136         mov     a8, a7
 137         j       .Ladd_carry
 138
 139 .Ladd_yexpzero:
 140         /* y is a subnormal value.  Replace its sign/exponent with zero,
 141            i.e., no implicit "1.0", and increment the apparent exponent
 142            because subnormals behave as if they had the minimum (nonzero)
 143            exponent.  Test for the case when both exponents are zero.  */
 144         slli    a3, a3, 9
 145         srli    a3, a3, 9
 146         bnone   a2, a6, .Ladd_bothexpzero
 147         addi    a8, a8, 1
 148         j       .Ladd_yexpdiff
 149
 150 .Ladd_bothexpzero:
 151         /* Both exponents are zero.  Handle this as a special case.  There
 152            is no need to shift or round, and the normal code for handling
 153            a carry into the exponent field will not work because it
 154            assumes there is an implicit "1.0" that needs to be added.  */
 155         add     a2, a2, a3
 156 1:      leaf_return
 157
 158 .Ladd_xexpzero:
 159         /* Same as "yexpzero" except skip handling the case when both
 160            exponents are zero.  */
 161         slli    a2, a2, 9
 162         srli    a2, a2, 9
 163         addi    a7, a7, 1
 164         j       .Ladd_xexpdiff
 165
 166 .Ladd_shiftx:
 167         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 168            because the exponent difference is always nonzero in this version,
 169            the shift sequence can use SLL and skip loading a constant zero.  */
 170         bnone   a2, a6, .Ladd_xexpzero
 171
 172         or      a2, a2, a6
 173         slli    a2, a2, 8
 174         srli    a2, a2, 8
 175
 176 .Ladd_xexpdiff:
 177         sub     a10, a8, a7
 178         bgeui   a10, 32, .Ladd_returny
 179
 180         ssr     a10
 181         sll     a9, a2
 182         srl     a2, a2
 183
 184         add     a2, a2, a3
 185
 186         /* Check if the add overflowed into the exponent.  */
 187         extui   a10, a2, 23, 9
 188         bne     a10, a8, .Ladd_carry
 189
 190 .Ladd_round:
 191         /* Round up if the leftover fraction is >= 1/2.  */
 192         bgez    a9, 1f
 193         addi    a2, a2, 1
 194
 195         /* Check if the leftover fraction is exactly 1/2.  */
 196         slli    a9, a9, 1
 197         beqz    a9, .Ladd_exactlyhalf
 198 1:      leaf_return
 199
 200 .Ladd_returny:
 201         mov     a2, a3
 202         leaf_return
 203
 204 .Ladd_carry:
 205         /* The addition has overflowed into the exponent field, so the
 206            value needs to be renormalized.  The mantissa of the result
 207            can be recovered by subtracting the original exponent and
 208            adding 0x800000 (which is the explicit "1.0" for the
 209            mantissa of the non-shifted operand -- the "1.0" for the
 210            shifted operand was already added).  The mantissa can then
 211            be shifted right by one bit.  The explicit "1.0" of the
 212            shifted mantissa then needs to be replaced by the exponent,
 213            incremented by one to account for the normalizing shift.
 214            It is faster to combine these operations: do the shift first
 215            and combine the additions and subtractions.  If x is the
 216            original exponent, the result is:
 217                shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
 218            or:
 219                shifted mantissa + ((x + 1) << 22)
 220            Note that the exponent is incremented here by leaving the
 221            explicit "1.0" of the mantissa in the exponent field.  */
 222
 223         /* Shift x right by one bit.  Save the lsb.  */
 224         mov     a10, a2
 225         srli    a2, a2, 1
 226
 227         /* See explanation above.  The original exponent is in a8.  */
 228         addi    a8, a8, 1
 229         slli    a8, a8, 22
 230         add     a2, a2, a8
 231
 232         /* Return an Infinity if the exponent overflowed.  */
 233         ball    a2, a6, .Ladd_infinity
 234
 235         /* Same thing as the "round" code except the msb of the leftover
 236            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 237         bbci.l  a10, 0, 1f
 238         addi    a2, a2, 1
 239         beqz    a9, .Ladd_exactlyhalf
 240 1:      leaf_return
 241
 242 .Ladd_infinity:
 243         /* Clear the mantissa.  */
 244         srli    a2, a2, 23
 245         slli    a2, a2, 23
 246
 247         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 248         slli    a8, a8, 1
 249         or      a2, a2, a8
 250         leaf_return
 251
 252 .Ladd_exactlyhalf:
 253         /* Round down to the nearest even value.  */
 254         srli    a2, a2, 1
 255         slli    a2, a2, 1
 256         leaf_return
 257
 258
 259         /* Subtraction */
 260 __subsf3_aux:
 261
 262         /* Handle NaNs and Infinities.  (This code is placed before the
 263            start of the function just to keep it in range of the limited
 264            branch displacements.)  */
 265
 266 .Lsub_xnan_or_inf:
 267         /* If y is neither Infinity nor NaN, return x.  */
 268         bnall   a3, a6, 1f
 269         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 270         movi    a4, 0x400000    /* make it a quiet NaN */
 271         or      a2, a2, a4
 272 1:      leaf_return
 273
 274 .Lsub_ynan_or_inf:
 275         /* Negate y and return it.  */
 276         slli    a7, a6, 8
 277         xor     a2, a3, a7
 278         leaf_return
 279
 280 .Lsub_opposite_signs:
 281         /* Operand signs differ.  Do an addition.  */
 282         slli    a7, a6, 8
 283         xor     a3, a3, a7
 284         j       .Ladd_same_sign
 285
 286         .align  4
 287         .global __subsf3
 288         .type   __subsf3, @function
 289 __subsf3:
 290         leaf_entry sp, 16
 291         movi    a6, 0x7f800000
 292
 293         /* Check if the two operands have the same sign.  */
 294         xor     a7, a2, a3
 295         bltz    a7, .Lsub_opposite_signs
 296
 297 .Lsub_same_sign:
 298         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 299         ball    a2, a6, .Lsub_xnan_or_inf
 300         ball    a3, a6, .Lsub_ynan_or_inf
 301
 302         /* Compare the operands.  In contrast to addition, the entire
 303            value matters here.  */
 304         extui   a7, a2, 23, 8
 305         extui   a8, a3, 23, 8
 306         bltu    a2, a3, .Lsub_xsmaller
 307
 308 .Lsub_ysmaller:
 309         /* Check if the smaller (or equal) exponent is zero.  */
 310         bnone   a3, a6, .Lsub_yexpzero
 311
 312         /* Replace y sign/exponent with 0x008.  */
 313         or      a3, a3, a6
 314         slli    a3, a3, 8
 315         srli    a3, a3, 8
 316
 317 .Lsub_yexpdiff:
 318         /* Compute the exponent difference.  */
 319         sub     a10, a7, a8
 320
 321         /* Exponent difference > 32 -- just return the bigger value.  */
 322         bgeui   a10, 32, 1f
 323
 324         /* Shift y right by the exponent difference.  Any bits that are
 325            shifted out of y are saved in a9 for rounding the result.  */
 326         ssr     a10
 327         movi    a9, 0
 328         src     a9, a3, a9
 329         srl     a3, a3
 330
 331         sub     a2, a2, a3
 332
 333         /* Subtract the leftover bits in a9 from zero and propagate any
 334            borrow from a2.  */
 335         neg     a9, a9
 336         addi    a10, a2, -1
 337         movnez  a2, a10, a9
 338
 339         /* Check if the subtract underflowed into the exponent.  */
 340         extui   a10, a2, 23, 8
 341         beq     a10, a7, .Lsub_round
 342         j       .Lsub_borrow
 343
 344 .Lsub_yexpzero:
 345         /* Return zero if the inputs are equal.  (For the non-subnormal
 346            case, subtracting the "1.0" will cause a borrow from the exponent
 347            and this case can be detected when handling the borrow.)  */
 348         beq     a2, a3, .Lsub_return_zero
 349
 350         /* y is a subnormal value.  Replace its sign/exponent with zero,
 351            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 352            y's apparent exponent because subnormals behave as if they had
 353            the minimum (nonzero) exponent.  */
 354         slli    a3, a3, 9
 355         srli    a3, a3, 9
 356         bnone   a2, a6, .Lsub_yexpdiff
 357         addi    a8, a8, 1
 358         j       .Lsub_yexpdiff
 359
 360 .Lsub_returny:
 361         /* Negate and return y.  */
 362         slli    a7, a6, 8
 363         xor     a2, a3, a7
 364 1:      leaf_return
 365
 366 .Lsub_xsmaller:
 367         /* Same thing as the "ysmaller" code, but with x and y swapped and
 368            with y negated.  */
 369         bnone   a2, a6, .Lsub_xexpzero
 370
 371         or      a2, a2, a6
 372         slli    a2, a2, 8
 373         srli    a2, a2, 8
 374
 375 .Lsub_xexpdiff:
 376         sub     a10, a8, a7
 377         bgeui   a10, 32, .Lsub_returny
 378
 379         ssr     a10
 380         movi    a9, 0
 381         src     a9, a2, a9
 382         srl     a2, a2
 383
 384         /* Negate y.  */
 385         slli    a11, a6, 8
 386         xor     a3, a3, a11
 387
 388         sub     a2, a3, a2
 389
 390         neg     a9, a9
 391         addi    a10, a2, -1
 392         movnez  a2, a10, a9
 393
 394         /* Check if the subtract underflowed into the exponent.  */
 395         extui   a10, a2, 23, 8
 396         bne     a10, a8, .Lsub_borrow
 397
 398 .Lsub_round:
 399         /* Round up if the leftover fraction is >= 1/2.  */
 400         bgez    a9, 1f
 401         addi    a2, a2, 1
 402
 403         /* Check if the leftover fraction is exactly 1/2.  */
 404         slli    a9, a9, 1
 405         beqz    a9, .Lsub_exactlyhalf
 406 1:      leaf_return
 407
 408 .Lsub_xexpzero:
 409         /* Same as "yexpzero".  */
 410         beq     a2, a3, .Lsub_return_zero
 411         slli    a2, a2, 9
 412         srli    a2, a2, 9
 413         bnone   a3, a6, .Lsub_xexpdiff
 414         addi    a7, a7, 1
 415         j       .Lsub_xexpdiff
 416
 417 .Lsub_return_zero:
 418         movi    a2, 0
 419         leaf_return
 420
 421 .Lsub_borrow:
 422         /* The subtraction has underflowed into the exponent field, so the
 423            value needs to be renormalized.  Shift the mantissa left as
 424            needed to remove any leading zeros and adjust the exponent
 425            accordingly.  If the exponent is not large enough to remove
 426            all the leading zeros, the result will be a subnormal value.  */
 427
 428         slli    a8, a2, 9
 429         beqz    a8, .Lsub_xzero
 430         do_nsau a6, a8, a7, a11
 431         srli    a8, a8, 9
 432         bge     a6, a10, .Lsub_subnormal
 433         addi    a6, a6, 1
 434
 435 .Lsub_normalize_shift:
 436         /* Shift the mantissa (a8/a9) left by a6.  */
 437         ssl     a6
 438         src     a8, a8, a9
 439         sll     a9, a9
 440
 441         /* Combine the shifted mantissa with the sign and exponent,
 442            decrementing the exponent by a6.  (The exponent has already
 443            been decremented by one due to the borrow from the subtraction,
 444            but adding the mantissa will increment the exponent by one.)  */
 445         srli    a2, a2, 23
 446         sub     a2, a2, a6
 447         slli    a2, a2, 23
 448         add     a2, a2, a8
 449         j       .Lsub_round
 450
 451 .Lsub_exactlyhalf:
 452         /* Round down to the nearest even value.  */
 453         srli    a2, a2, 1
 454         slli    a2, a2, 1
 455         leaf_return
 456
 457 .Lsub_xzero:
 458         /* If there was a borrow from the exponent, and the mantissa and
 459            guard digits are all zero, then the inputs were equal and the
 460            result should be zero.  */
 461         beqz    a9, .Lsub_return_zero
 462
 463         /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
 464         addi    a11, a10, -24
 465         movi    a6, 24
 466         movltz  a6, a10, a11
 467         j       .Lsub_normalize_shift
 468
 469 .Lsub_subnormal:
 470         /* The exponent is too small to shift away all the leading zeros.
 471            Set a6 to the current exponent (which has already been
 472            decremented by the borrow) so that the exponent of the result
 473            will be zero.  Do not add 1 to a6 in this case, because: (1)
 474            adding the mantissa will not increment the exponent, so there is
 475            no need to subtract anything extra from the exponent to
 476            compensate, and (2) the effective exponent of a subnormal is 1
 477            not 0 so the shift amount must be 1 smaller than normal. */
 478         mov     a6, a10
 479         j       .Lsub_normalize_shift
 480
 481 #endif /* L_addsubsf3 */
 482
 483 #ifdef L_mulsf3
 484
 485         /* Multiplication */
 486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 487 #define XCHAL_NO_MUL 1
 488 #endif
 489
 490 __mulsf3_aux:
 491
 492         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 493            (This code is placed before the start of the function just to
 494            keep it in range of the limited branch displacements.)  */
 495
 496 .Lmul_xexpzero:
 497         /* Clear the sign bit of x.  */
 498         slli    a2, a2, 1
 499         srli    a2, a2, 1
 500
 501         /* If x is zero, return zero.  */
 502         beqz    a2, .Lmul_return_zero
 503
 504         /* Normalize x.  Adjust the exponent in a8.  */
 505         do_nsau a10, a2, a11, a12
 506         addi    a10, a10, -8
 507         ssl     a10
 508         sll     a2, a2
 509         movi    a8, 1
 510         sub     a8, a8, a10
 511         j       .Lmul_xnormalized
 512
 513 .Lmul_yexpzero:
 514         /* Clear the sign bit of y.  */
 515         slli    a3, a3, 1
 516         srli    a3, a3, 1
 517
 518         /* If y is zero, return zero.  */
 519         beqz    a3, .Lmul_return_zero
 520
 521         /* Normalize y.  Adjust the exponent in a9.  */
 522         do_nsau a10, a3, a11, a12
 523         addi    a10, a10, -8
 524         ssl     a10
 525         sll     a3, a3
 526         movi    a9, 1
 527         sub     a9, a9, a10
 528         j       .Lmul_ynormalized
 529
 530 .Lmul_return_zero:
 531         /* Return zero with the appropriate sign bit.  */
 532         srli    a2, a7, 31
 533         slli    a2, a2, 31
 534         j       .Lmul_done
 535
 536 .Lmul_xnan_or_inf:
 537         /* If y is zero, return NaN.  */
 538         slli    a8, a3, 1
 539         bnez    a8, 1f
 540         movi    a4, 0x400000    /* make it a quiet NaN */
 541         or      a2, a2, a4
 542         j       .Lmul_done
 543 1:
 544         /* If y is NaN, return y.  */
 545         bnall   a3, a6, .Lmul_returnx
 546         slli    a8, a3, 9
 547         beqz    a8, .Lmul_returnx
 548
 549 .Lmul_returny:
 550         mov     a2, a3
 551
 552 .Lmul_returnx:
 553         /* Set the sign bit and return.  */
 554         extui   a7, a7, 31, 1
 555         slli    a2, a2, 1
 556         ssai    1
 557         src     a2, a7, a2
 558         j       .Lmul_done
 559
 560 .Lmul_ynan_or_inf:
 561         /* If x is zero, return NaN.  */
 562         slli    a8, a2, 1
 563         bnez    a8, .Lmul_returny
 564         movi    a7, 0x400000    /* make it a quiet NaN */
 565         or      a2, a3, a7
 566         j       .Lmul_done
 567
 568         .align  4
 569         .global __mulsf3
 570         .type   __mulsf3, @function
 571 __mulsf3:
 572 #if __XTENSA_CALL0_ABI__
 573         leaf_entry sp, 32
 574         addi    sp, sp, -32
 575         s32i    a12, sp, 16
 576         s32i    a13, sp, 20
 577         s32i    a14, sp, 24
 578         s32i    a15, sp, 28
 579 #elif XCHAL_NO_MUL
 580         /* This is not really a leaf function; allocate enough stack space
 581            to allow CALL12s to a helper function.  */
 582         leaf_entry sp, 64
 583 #else
 584         leaf_entry sp, 32
 585 #endif
 586         movi    a6, 0x7f800000
 587
 588         /* Get the sign of the result.  */
 589         xor     a7, a2, a3
 590
 591         /* Check for NaN and infinity.  */
 592         ball    a2, a6, .Lmul_xnan_or_inf
 593         ball    a3, a6, .Lmul_ynan_or_inf
 594
 595         /* Extract the exponents.  */
 596         extui   a8, a2, 23, 8
 597         extui   a9, a3, 23, 8
 598
 599         beqz    a8, .Lmul_xexpzero
 600 .Lmul_xnormalized:
 601         beqz    a9, .Lmul_yexpzero
 602 .Lmul_ynormalized:
 603
 604         /* Add the exponents.  */
 605         add     a8, a8, a9
 606
 607         /* Replace sign/exponent fields with explicit "1.0".  */
 608         movi    a10, 0xffffff
 609         or      a2, a2, a6
 610         and     a2, a2, a10
 611         or      a3, a3, a6
 612         and     a3, a3, a10
 613
 614         /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
 615
 616 #if XCHAL_HAVE_MUL32_HIGH
 617
 618         mull    a6, a2, a3
 619         muluh   a2, a2, a3
 620
 621 #else
 622
 623         /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
 624            products.  These partial products are:
 625
 626                 0 xl * yl
 627
 628                 1 xl * yh
 629                 2 xh * yl
 630
 631                 3 xh * yh
 632
 633            If using the Mul16 or Mul32 multiplier options, these input
 634            chunks must be stored in separate registers.  For Mac16, the
 635            UMUL.AA.* opcodes can specify that the inputs come from either
 636            half of the registers, so there is no need to shift them out
 637            ahead of time.  If there is no multiply hardware, the 16-bit
 638            chunks can be extracted when setting up the arguments to the
 639            separate multiply function.  */
 640
 641 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 642         /* Calling a separate multiply function will clobber a0 and requires
 643            use of a8 as a temporary, so save those values now.  (The function
 644            uses a custom ABI so nothing else needs to be saved.)  */
 645         s32i    a0, sp, 0
 646         s32i    a8, sp, 4
 647 #endif
 648
 649 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 650
 651 #define a2h a4
 652 #define a3h a5
 653
 654         /* Get the high halves of the inputs into registers.  */
 655         srli    a2h, a2, 16
 656         srli    a3h, a3, 16
 657
 658 #define a2l a2
 659 #define a3l a3
 660
 661 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 662         /* Clear the high halves of the inputs.  This does not matter
 663            for MUL16 because the high bits are ignored.  */
 664         extui   a2, a2, 0, 16
 665         extui   a3, a3, 0, 16
 666 #endif
 667 #endif /* MUL16 || MUL32 */
 668
 669
 670 #if XCHAL_HAVE_MUL16
 671
 672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 673         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 674
 675 #elif XCHAL_HAVE_MUL32
 676
 677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 678         mull    dst, xreg ## xhalf, yreg ## yhalf
 679
 680 #elif XCHAL_HAVE_MAC16
 681
 682 /* The preprocessor insists on inserting a space when concatenating after
 683    a period in the definition of do_mul below.  These macros are a workaround
 684    using underscores instead of periods when doing the concatenation.  */
 685 #define umul_aa_ll umul.aa.ll
 686 #define umul_aa_lh umul.aa.lh
 687 #define umul_aa_hl umul.aa.hl
 688 #define umul_aa_hh umul.aa.hh
 689
 690 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 691         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 692         rsr     dst, ACCLO
 693
 694 #else /* no multiply hardware */
 695
 696 #define set_arg_l(dst, src) \
 697         extui   dst, src, 0, 16
 698 #define set_arg_h(dst, src) \
 699         srli    dst, src, 16
 700
 701 #if __XTENSA_CALL0_ABI__
 702 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 703         set_arg_ ## xhalf (a13, xreg); \
 704         set_arg_ ## yhalf (a14, yreg); \
 705         call0   .Lmul_mulsi3; \
 706         mov     dst, a12
 707 #else
 708 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 709         set_arg_ ## xhalf (a14, xreg); \
 710         set_arg_ ## yhalf (a15, yreg); \
 711         call12  .Lmul_mulsi3; \
 712         mov     dst, a14
 713 #endif /* __XTENSA_CALL0_ABI__ */
 714
 715 #endif /* no multiply hardware */
 716
 717         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 718         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 719         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 720         movi    a9, 0
 721         add     a6, a6, a11
 722         bgeu    a6, a11, 1f
 723         addi    a9, a9, 1
 724 1:
 725         /* Shift the high half of a9/a6 into position in a9.  Note that
 726            this value can be safely incremented without any carry-outs.  */
 727         ssai    16
 728         src     a9, a9, a6
 729
 730         /* Compute the low word into a6.  */
 731         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 732         sll     a6, a6
 733         add     a6, a6, a11
 734         bgeu    a6, a11, 1f
 735         addi    a9, a9, 1
 736 1:
 737         /* Compute the high word into a2.  */
 738         do_mul(a2, a2, h, a3, h)        /* pp 3 */
 739         add     a2, a2, a9
 740
 741 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 742         /* Restore values saved on the stack during the multiplication.  */
 743         l32i    a0, sp, 0
 744         l32i    a8, sp, 4
 745 #endif
 746 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
 747
 748         /* Shift left by 9 bits, unless there was a carry-out from the
 749            multiply, in which case, shift by 8 bits and increment the
 750            exponent.  */
 751         movi    a4, 9
 752         srli    a5, a2, 24 - 9
 753         beqz    a5, 1f
 754         addi    a4, a4, -1
 755         addi    a8, a8, 1
 756 1:      ssl     a4
 757         src     a2, a2, a6
 758         sll     a6, a6
 759
 760         /* Subtract the extra bias from the exponent sum (plus one to account
 761            for the explicit "1.0" of the mantissa that will be added to the
 762            exponent in the final result).  */
 763         movi    a4, 0x80
 764         sub     a8, a8, a4
 765
 766         /* Check for over/underflow.  The value in a8 is one less than the
 767            final exponent, so values in the range 0..fd are OK here.  */
 768         movi    a4, 0xfe
 769         bgeu    a8, a4, .Lmul_overflow
 770
 771 .Lmul_round:
 772         /* Round.  */
 773         bgez    a6, .Lmul_rounded
 774         addi    a2, a2, 1
 775         slli    a6, a6, 1
 776         beqz    a6, .Lmul_exactlyhalf
 777
 778 .Lmul_rounded:
 779         /* Add the exponent to the mantissa.  */
 780         slli    a8, a8, 23
 781         add     a2, a2, a8
 782
 783 .Lmul_addsign:
 784         /* Add the sign bit.  */
 785         srli    a7, a7, 31
 786         slli    a7, a7, 31
 787         or      a2, a2, a7
 788
 789 .Lmul_done:
 790 #if __XTENSA_CALL0_ABI__
 791         l32i    a12, sp, 16
 792         l32i    a13, sp, 20
 793         l32i    a14, sp, 24
 794         l32i    a15, sp, 28
 795         addi    sp, sp, 32
 796 #endif
 797         leaf_return
 798
 799 .Lmul_exactlyhalf:
 800         /* Round down to the nearest even value.  */
 801         srli    a2, a2, 1
 802         slli    a2, a2, 1
 803         j       .Lmul_rounded
 804
 805 .Lmul_overflow:
 806         bltz    a8, .Lmul_underflow
 807         /* Return +/- Infinity.  */
 808         movi    a8, 0xff
 809         slli    a2, a8, 23
 810         j       .Lmul_addsign
 811
 812 .Lmul_underflow:
 813         /* Create a subnormal value, where the exponent field contains zero,
 814            but the effective exponent is 1.  The value of a8 is one less than
 815            the actual exponent, so just negate it to get the shift amount.  */
 816         neg     a8, a8
 817         mov     a9, a6
 818         ssr     a8
 819         bgeui   a8, 32, .Lmul_flush_to_zero
 820
 821         /* Shift a2 right.  Any bits that are shifted out of a2 are saved
 822            in a6 (combined with the shifted-out bits currently in a6) for
 823            rounding the result.  */
 824         sll     a6, a2
 825         srl     a2, a2
 826
 827         /* Set the exponent to zero.  */
 828         movi    a8, 0
 829
 830         /* Pack any nonzero bits shifted out into a6.  */
 831         beqz    a9, .Lmul_round
 832         movi    a9, 1
 833         or      a6, a6, a9
 834         j       .Lmul_round
 835
 836 .Lmul_flush_to_zero:
 837         /* Return zero with the appropriate sign bit.  */
 838         srli    a2, a7, 31
 839         slli    a2, a2, 31
 840         j       .Lmul_done
 841
 842 #if XCHAL_NO_MUL
 843
 844         /* For Xtensa processors with no multiply hardware, this simplified
 845            version of _mulsi3 is used for multiplying 16-bit chunks of
 846            the floating-point mantissas.  When using CALL0, this function
 847            uses a custom ABI: the inputs are passed in a13 and a14, the
 848            result is returned in a12, and a8 and a15 are clobbered.  */
 849         .align  4
 850 .Lmul_mulsi3:
 851         leaf_entry sp, 16
 852         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 853         movi    \dst, 0
 854 1:      add     \tmp1, \src2, \dst
 855         extui   \tmp2, \src1, 0, 1
 856         movnez  \dst, \tmp1, \tmp2
 857
 858         do_addx2 \tmp1, \src2, \dst, \tmp1
 859         extui   \tmp2, \src1, 1, 1
 860         movnez  \dst, \tmp1, \tmp2
 861
 862         do_addx4 \tmp1, \src2, \dst, \tmp1
 863         extui   \tmp2, \src1, 2, 1
 864         movnez  \dst, \tmp1, \tmp2
 865
 866         do_addx8 \tmp1, \src2, \dst, \tmp1
 867         extui   \tmp2, \src1, 3, 1
 868         movnez  \dst, \tmp1, \tmp2
 869
 870         srli    \src1, \src1, 4
 871         slli    \src2, \src2, 4
 872         bnez    \src1, 1b
 873         .endm
 874 #if __XTENSA_CALL0_ABI__
 875         mul_mulsi3_body a12, a13, a14, a15, a8
 876 #else
 877         /* The result will be written into a2, so save that argument in a4.  */
 878         mov     a4, a2
 879         mul_mulsi3_body a2, a4, a3, a5, a6
 880 #endif
 881         leaf_return
 882 #endif /* XCHAL_NO_MUL */
 883 #endif /* L_mulsf3 */
 884
 885 #ifdef L_divsf3
 886
 887         /* Division */
 888 __divsf3_aux:
 889
 890         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 891            (This code is placed before the start of the function just to
 892            keep it in range of the limited branch displacements.)  */
 893
 894 .Ldiv_yexpzero:
 895         /* Clear the sign bit of y.  */
 896         slli    a3, a3, 1
 897         srli    a3, a3, 1
 898
 899         /* Check for division by zero.  */
 900         beqz    a3, .Ldiv_yzero
 901
 902         /* Normalize y.  Adjust the exponent in a9.  */
 903         do_nsau a10, a3, a4, a5
 904         addi    a10, a10, -8
 905         ssl     a10
 906         sll     a3, a3
 907         movi    a9, 1
 908         sub     a9, a9, a10
 909         j       .Ldiv_ynormalized
 910
 911 .Ldiv_yzero:
 912         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
 913         slli    a4, a2, 1
 914         srli    a4, a4, 1
 915         srli    a2, a7, 31
 916         slli    a2, a2, 31
 917         or      a2, a2, a6
 918         bnez    a4, 1f
 919         movi    a4, 0x400000    /* make it a quiet NaN */
 920         or      a2, a2, a4
 921 1:      leaf_return
 922
 923 .Ldiv_xexpzero:
 924         /* Clear the sign bit of x.  */
 925         slli    a2, a2, 1
 926         srli    a2, a2, 1
 927
 928         /* If x is zero, return zero.  */
 929         beqz    a2, .Ldiv_return_zero
 930
 931         /* Normalize x.  Adjust the exponent in a8.  */
 932         do_nsau a10, a2, a4, a5
 933         addi    a10, a10, -8
 934         ssl     a10
 935         sll     a2, a2
 936         movi    a8, 1
 937         sub     a8, a8, a10
 938         j       .Ldiv_xnormalized
 939
 940 .Ldiv_return_zero:
 941         /* Return zero with the appropriate sign bit.  */
 942         srli    a2, a7, 31
 943         slli    a2, a2, 31
 944         leaf_return
 945
 946 .Ldiv_xnan_or_inf:
 947         /* Set the sign bit of the result.  */
 948         srli    a7, a3, 31
 949         slli    a7, a7, 31
 950         xor     a2, a2, a7
 951         /* If y is NaN or Inf, return NaN.  */
 952         bnall   a3, a6, 1f
 953         movi    a4, 0x400000    /* make it a quiet NaN */
 954         or      a2, a2, a4
 955 1:      leaf_return
 956
 957 .Ldiv_ynan_or_inf:
 958         /* If y is Infinity, return zero.  */
 959         slli    a8, a3, 9
 960         beqz    a8, .Ldiv_return_zero
 961         /* y is NaN; return it.  */
 962         mov     a2, a3
 963         leaf_return
 964
 965         .align  4
 966         .global __divsf3
 967         .type   __divsf3, @function
 968 __divsf3:
 969         leaf_entry sp, 16
 970         movi    a6, 0x7f800000
 971
 972         /* Get the sign of the result.  */
 973         xor     a7, a2, a3
 974
 975         /* Check for NaN and infinity.  */
 976         ball    a2, a6, .Ldiv_xnan_or_inf
 977         ball    a3, a6, .Ldiv_ynan_or_inf
 978
 979         /* Extract the exponents.  */
 980         extui   a8, a2, 23, 8
 981         extui   a9, a3, 23, 8
 982
 983         beqz    a9, .Ldiv_yexpzero
 984 .Ldiv_ynormalized:
 985         beqz    a8, .Ldiv_xexpzero
 986 .Ldiv_xnormalized:
 987
 988         /* Subtract the exponents.  */
 989         sub     a8, a8, a9
 990
 991         /* Replace sign/exponent fields with explicit "1.0".  */
 992         movi    a10, 0xffffff
 993         or      a2, a2, a6
 994         and     a2, a2, a10
 995         or      a3, a3, a6
 996         and     a3, a3, a10
 997
 998         /* The first digit of the mantissa division must be a one.
 999            Shift x (and adjust the exponent) as needed to make this true.  */
1000         bltu    a3, a2, 1f
1001         slli    a2, a2, 1
1002         addi    a8, a8, -1
1003 1:
1004         /* Do the first subtraction and shift.  */
1005         sub     a2, a2, a3
1006         slli    a2, a2, 1
1007
1008         /* Put the quotient into a10.  */
1009         movi    a10, 1
1010
1011         /* Divide one bit at a time for 23 bits.  */
1012         movi    a9, 23
1013 #if XCHAL_HAVE_LOOPS
1014         loop    a9, .Ldiv_loopend
1015 #endif
1016 .Ldiv_loop:
1017         /* Shift the quotient << 1.  */
1018         slli    a10, a10, 1
1019
1020         /* Is this digit a 0 or 1?  */
1021         bltu    a2, a3, 1f
1022
1023         /* Output a 1 and subtract.  */
1024         addi    a10, a10, 1
1025         sub     a2, a2, a3
1026
1027         /* Shift the dividend << 1.  */
1028 1:      slli    a2, a2, 1
1029
1030 #if !XCHAL_HAVE_LOOPS
1031         addi    a9, a9, -1
1032         bnez    a9, .Ldiv_loop
1033 #endif
1034 .Ldiv_loopend:
1035
1036         /* Add the exponent bias (less one to account for the explicit "1.0"
1037            of the mantissa that will be added to the exponent in the final
1038            result).  */
1039         addi    a8, a8, 0x7e
1040
1041         /* Check for over/underflow.  The value in a8 is one less than the
1042            final exponent, so values in the range 0..fd are OK here.  */
1043         movi    a4, 0xfe
1044         bgeu    a8, a4, .Ldiv_overflow
1045
1046 .Ldiv_round:
1047         /* Round.  The remainder (<< 1) is in a2.  */
1048         bltu    a2, a3, .Ldiv_rounded
1049         addi    a10, a10, 1
1050         beq     a2, a3, .Ldiv_exactlyhalf
1051
1052 .Ldiv_rounded:
1053         /* Add the exponent to the mantissa.  */
1054         slli    a8, a8, 23
1055         add     a2, a10, a8
1056
1057 .Ldiv_addsign:
1058         /* Add the sign bit.  */
1059         srli    a7, a7, 31
1060         slli    a7, a7, 31
1061         or      a2, a2, a7
1062         leaf_return
1063
1064 .Ldiv_overflow:
1065         bltz    a8, .Ldiv_underflow
1066         /* Return +/- Infinity.  */
1067         addi    a8, a4, 1       /* 0xff */
1068         slli    a2, a8, 23
1069         j       .Ldiv_addsign
1070
1071 .Ldiv_exactlyhalf:
1072         /* Remainder is exactly half the divisor.  Round even.  */
1073         srli    a10, a10, 1
1074         slli    a10, a10, 1
1075         j       .Ldiv_rounded
1076
1077 .Ldiv_underflow:
1078         /* Create a subnormal value, where the exponent field contains zero,
1079            but the effective exponent is 1.  The value of a8 is one less than
1080            the actual exponent, so just negate it to get the shift amount.  */
1081         neg     a8, a8
1082         ssr     a8
1083         bgeui   a8, 32, .Ldiv_flush_to_zero
1084
1085         /* Shift a10 right.  Any bits that are shifted out of a10 are
1086            saved in a6 for rounding the result.  */
1087         sll     a6, a10
1088         srl     a10, a10
1089
1090         /* Set the exponent to zero.  */
1091         movi    a8, 0
1092
1093         /* Pack any nonzero remainder (in a2) into a6.  */
1094         beqz    a2, 1f
1095         movi    a9, 1
1096         or      a6, a6, a9
1097
1098         /* Round a10 based on the bits shifted out into a6.  */
1099 1:      bgez    a6, .Ldiv_rounded
1100         addi    a10, a10, 1
1101         slli    a6, a6, 1
1102         bnez    a6, .Ldiv_rounded
1103         srli    a10, a10, 1
1104         slli    a10, a10, 1
1105         j       .Ldiv_rounded
1106
1107 .Ldiv_flush_to_zero:
1108         /* Return zero with the appropriate sign bit.  */
1109         srli    a2, a7, 31
1110         slli    a2, a2, 31
1111         leaf_return
1112
1113 #endif /* L_divsf3 */
1114
1115 #ifdef L_cmpsf2
1116
1117         /* Equal and Not Equal */
1118
1119         .align  4
1120         .global __eqsf2
1121         .global __nesf2
1122         .set    __nesf2, __eqsf2
1123         .type   __eqsf2, @function
1124 __eqsf2:
1125         leaf_entry sp, 16
1126         bne     a2, a3, 4f
1127
1128         /* The values are equal but NaN != NaN.  Check the exponent.  */
1129         movi    a6, 0x7f800000
1130         ball    a2, a6, 3f
1131
1132         /* Equal.  */
1133         movi    a2, 0
1134         leaf_return
1135
1136         /* Not equal.  */
1137 2:      movi    a2, 1
1138         leaf_return
1139
1140         /* Check if the mantissas are nonzero.  */
1141 3:      slli    a7, a2, 9
1142         j       5f
1143
1144         /* Check if x and y are zero with different signs.  */
1145 4:      or      a7, a2, a3
1146         slli    a7, a7, 1
1147
1148         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149            or x when exponent(x) = 0x7f8 and x == y.  */
1150 5:      movi    a2, 0
1151         movi    a3, 1
1152         movnez  a2, a3, a7
1153         leaf_return
1154
1155
1156         /* Greater Than */
1157
1158         .align  4
1159         .global __gtsf2
1160         .type   __gtsf2, @function
1161 __gtsf2:
1162         leaf_entry sp, 16
1163         movi    a6, 0x7f800000
1164         ball    a2, a6, 2f
1165 1:      bnall   a3, a6, .Lle_cmp
1166
1167         /* Check if y is a NaN.  */
1168         slli    a7, a3, 9
1169         beqz    a7, .Lle_cmp
1170         movi    a2, 0
1171         leaf_return
1172
1173         /* Check if x is a NaN.  */
1174 2:      slli    a7, a2, 9
1175         beqz    a7, 1b
1176         movi    a2, 0
1177         leaf_return
1178
1179
1180         /* Less Than or Equal */
1181
1182         .align  4
1183         .global __lesf2
1184         .type   __lesf2, @function
1185 __lesf2:
1186         leaf_entry sp, 16
1187         movi    a6, 0x7f800000
1188         ball    a2, a6, 2f
1189 1:      bnall   a3, a6, .Lle_cmp
1190
1191         /* Check if y is a NaN.  */
1192         slli    a7, a3, 9
1193         beqz    a7, .Lle_cmp
1194         movi    a2, 1
1195         leaf_return
1196
1197         /* Check if x is a NaN.  */
1198 2:      slli    a7, a2, 9
1199         beqz    a7, 1b
1200         movi    a2, 1
1201         leaf_return
1202
1203 .Lle_cmp:
1204         /* Check if x and y have different signs.  */
1205         xor     a7, a2, a3
1206         bltz    a7, .Lle_diff_signs
1207
1208         /* Check if x is negative.  */
1209         bltz    a2, .Lle_xneg
1210
1211         /* Check if x <= y.  */
1212         bltu    a3, a2, 5f
1213 4:      movi    a2, 0
1214         leaf_return
1215
1216 .Lle_xneg:
1217         /* Check if y <= x.  */
1218         bgeu    a2, a3, 4b
1219 5:      movi    a2, 1
1220         leaf_return
1221
1222 .Lle_diff_signs:
1223         bltz    a2, 4b
1224
1225         /* Check if both x and y are zero.  */
1226         or      a7, a2, a3
1227         slli    a7, a7, 1
1228         movi    a2, 1
1229         movi    a3, 0
1230         moveqz  a2, a3, a7
1231         leaf_return
1232
1233
1234         /* Greater Than or Equal */
1235
1236         .align  4
1237         .global __gesf2
1238         .type   __gesf2, @function
1239 __gesf2:
1240         leaf_entry sp, 16
1241         movi    a6, 0x7f800000
1242         ball    a2, a6, 2f
1243 1:      bnall   a3, a6, .Llt_cmp
1244
1245         /* Check if y is a NaN.  */
1246         slli    a7, a3, 9
1247         beqz    a7, .Llt_cmp
1248         movi    a2, -1
1249         leaf_return
1250
1251         /* Check if x is a NaN.  */
1252 2:      slli    a7, a2, 9
1253         beqz    a7, 1b
1254         movi    a2, -1
1255         leaf_return
1256
1257
1258         /* Less Than */
1259
1260         .align  4
1261         .global __ltsf2
1262         .type   __ltsf2, @function
1263 __ltsf2:
1264         leaf_entry sp, 16
1265         movi    a6, 0x7f800000
1266         ball    a2, a6, 2f
1267 1:      bnall   a3, a6, .Llt_cmp
1268
1269         /* Check if y is a NaN.  */
1270         slli    a7, a3, 9
1271         beqz    a7, .Llt_cmp
1272         movi    a2, 0
1273         leaf_return
1274
1275         /* Check if x is a NaN.  */
1276 2:      slli    a7, a2, 9
1277         beqz    a7, 1b
1278         movi    a2, 0
1279         leaf_return
1280
1281 .Llt_cmp:
1282         /* Check if x and y have different signs.  */
1283         xor     a7, a2, a3
1284         bltz    a7, .Llt_diff_signs
1285
1286         /* Check if x is negative.  */
1287         bltz    a2, .Llt_xneg
1288
1289         /* Check if x < y.  */
1290         bgeu    a2, a3, 5f
1291 4:      movi    a2, -1
1292         leaf_return
1293
1294 .Llt_xneg:
1295         /* Check if y < x.  */
1296         bltu    a3, a2, 4b
1297 5:      movi    a2, 0
1298         leaf_return
1299
1300 .Llt_diff_signs:
1301         bgez    a2, 5b
1302
1303         /* Check if both x and y are nonzero.  */
1304         or      a7, a2, a3
1305         slli    a7, a7, 1
1306         movi    a2, 0
1307         movi    a3, -1
1308         movnez  a2, a3, a7
1309         leaf_return
1310
1311
1312         /* Unordered */
1313
1314         .align  4
1315         .global __unordsf2
1316         .type   __unordsf2, @function
1317 __unordsf2:
1318         leaf_entry sp, 16
1319         movi    a6, 0x7f800000
1320         ball    a2, a6, 3f
1321 1:      ball    a3, a6, 4f
1322 2:      movi    a2, 0
1323         leaf_return
1324
1325 3:      slli    a7, a2, 9
1326         beqz    a7, 1b
1327         movi    a2, 1
1328         leaf_return
1329
1330 4:      slli    a7, a3, 9
1331         beqz    a7, 2b
1332         movi    a2, 1
1333         leaf_return
1334
1335 #endif /* L_cmpsf2 */
1336
1337 #ifdef L_fixsfsi
1338
1339         .align  4
1340         .global __fixsfsi
1341         .type   __fixsfsi, @function
1342 __fixsfsi:
1343         leaf_entry sp, 16
1344
1345         /* Check for NaN and Infinity.  */
1346         movi    a6, 0x7f800000
1347         ball    a2, a6, .Lfixsfsi_nan_or_inf
1348
1349         /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1350         extui   a4, a2, 23, 8
1351         addi    a4, a4, -0x7e
1352         bgei    a4, 32, .Lfixsfsi_maxint
1353         blti    a4, 1, .Lfixsfsi_zero
1354
1355         /* Add explicit "1.0" and shift << 8.  */
1356         or      a7, a2, a6
1357         slli    a5, a7, 8
1358
1359         /* Shift back to the right, based on the exponent.  */
1360         ssl     a4              /* shift by 32 - a4 */
1361         srl     a5, a5
1362
1363         /* Negate the result if sign != 0.  */
1364         neg     a2, a5
1365         movgez  a2, a5, a7
1366         leaf_return
1367
1368 .Lfixsfsi_nan_or_inf:
1369         /* Handle Infinity and NaN.  */
1370         slli    a4, a2, 9
1371         beqz    a4, .Lfixsfsi_maxint
1372
1373         /* Translate NaN to +maxint.  */
1374         movi    a2, 0
1375
1376 .Lfixsfsi_maxint:
1377         slli    a4, a6, 8       /* 0x80000000 */
1378         addi    a5, a4, -1      /* 0x7fffffff */
1379         movgez  a4, a5, a2
1380         mov     a2, a4
1381         leaf_return
1382
1383 .Lfixsfsi_zero:
1384         movi    a2, 0
1385         leaf_return
1386
1387 #endif /* L_fixsfsi */
1388
1389 #ifdef L_fixsfdi
1390
1391         .align  4
1392         .global __fixsfdi
1393         .type   __fixsfdi, @function
1394 __fixsfdi:
1395         leaf_entry sp, 16
1396
1397         /* Check for NaN and Infinity.  */
1398         movi    a6, 0x7f800000
1399         ball    a2, a6, .Lfixsfdi_nan_or_inf
1400
1401         /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1402         extui   a4, a2, 23, 8
1403         addi    a4, a4, -0x7e
1404         bgei    a4, 64, .Lfixsfdi_maxint
1405         blti    a4, 1, .Lfixsfdi_zero
1406
1407         /* Add explicit "1.0" and shift << 8.  */
1408         or      a7, a2, a6
1409         slli    xh, a7, 8
1410
1411         /* Shift back to the right, based on the exponent.  */
1412         ssl     a4              /* shift by 64 - a4 */
1413         bgei    a4, 32, .Lfixsfdi_smallshift
1414         srl     xl, xh
1415         movi    xh, 0
1416
1417 .Lfixsfdi_shifted:
1418         /* Negate the result if sign != 0.  */
1419         bgez    a7, 1f
1420         neg     xl, xl
1421         neg     xh, xh
1422         beqz    xl, 1f
1423         addi    xh, xh, -1
1424 1:      leaf_return
1425
1426 .Lfixsfdi_smallshift:
1427         movi    xl, 0
1428         sll     xl, xh
1429         srl     xh, xh
1430         j       .Lfixsfdi_shifted
1431
1432 .Lfixsfdi_nan_or_inf:
1433         /* Handle Infinity and NaN.  */
1434         slli    a4, a2, 9
1435         beqz    a4, .Lfixsfdi_maxint
1436
1437         /* Translate NaN to +maxint.  */
1438         movi    a2, 0
1439
1440 .Lfixsfdi_maxint:
1441         slli    a7, a6, 8       /* 0x80000000 */
1442         bgez    a2, 1f
1443         mov     xh, a7
1444         movi    xl, 0
1445         leaf_return
1446
1447 1:      addi    xh, a7, -1      /* 0x7fffffff */
1448         movi    xl, -1
1449         leaf_return
1450
1451 .Lfixsfdi_zero:
1452         movi    xh, 0
1453         movi    xl, 0
1454         leaf_return
1455
1456 #endif /* L_fixsfdi */
1457
1458 #ifdef L_fixunssfsi
1459
1460         .align  4
1461         .global __fixunssfsi
1462         .type   __fixunssfsi, @function
1463 __fixunssfsi:
1464         leaf_entry sp, 16
1465
1466         /* Check for NaN and Infinity.  */
1467         movi    a6, 0x7f800000
1468         ball    a2, a6, .Lfixunssfsi_nan_or_inf
1469
1470         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1471         extui   a4, a2, 23, 8
1472         addi    a4, a4, -0x7f
1473         bgei    a4, 32, .Lfixunssfsi_maxint
1474         bltz    a4, .Lfixunssfsi_zero
1475
1476         /* Add explicit "1.0" and shift << 8.  */
1477         or      a7, a2, a6
1478         slli    a5, a7, 8
1479
1480         /* Shift back to the right, based on the exponent.  */
1481         addi    a4, a4, 1
1482         beqi    a4, 32, .Lfixunssfsi_bigexp
1483         ssl     a4              /* shift by 32 - a4 */
1484         srl     a5, a5
1485
1486         /* Negate the result if sign != 0.  */
1487         neg     a2, a5
1488         movgez  a2, a5, a7
1489         leaf_return
1490
1491 .Lfixunssfsi_nan_or_inf:
1492         /* Handle Infinity and NaN.  */
1493         slli    a4, a2, 9
1494         beqz    a4, .Lfixunssfsi_maxint
1495
1496         /* Translate NaN to 0xffffffff.  */
1497         movi    a2, -1
1498         leaf_return
1499
1500 .Lfixunssfsi_maxint:
1501         slli    a4, a6, 8       /* 0x80000000 */
1502         movi    a5, -1          /* 0xffffffff */
1503         movgez  a4, a5, a2
1504         mov     a2, a4
1505         leaf_return
1506
1507 .Lfixunssfsi_zero:
1508         movi    a2, 0
1509         leaf_return
1510
1511 .Lfixunssfsi_bigexp:
1512         /* Handle unsigned maximum exponent case.  */
1513         bltz    a2, 1f
1514         mov     a2, a5          /* no shift needed */
1515         leaf_return
1516
1517         /* Return 0x80000000 if negative.  */
1518 1:      slli    a2, a6, 8
1519         leaf_return
1520
1521 #endif /* L_fixunssfsi */
1522
1523 #ifdef L_fixunssfdi
1524
1525         .align  4
1526         .global __fixunssfdi
1527         .type   __fixunssfdi, @function
1528 __fixunssfdi:
1529         leaf_entry sp, 16
1530
1531         /* Check for NaN and Infinity.  */
1532         movi    a6, 0x7f800000
1533         ball    a2, a6, .Lfixunssfdi_nan_or_inf
1534
1535         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1536         extui   a4, a2, 23, 8
1537         addi    a4, a4, -0x7f
1538         bgei    a4, 64, .Lfixunssfdi_maxint
1539         bltz    a4, .Lfixunssfdi_zero
1540
1541         /* Add explicit "1.0" and shift << 8.  */
1542         or      a7, a2, a6
1543         slli    xh, a7, 8
1544
1545         /* Shift back to the right, based on the exponent.  */
1546         addi    a4, a4, 1
1547         beqi    a4, 64, .Lfixunssfdi_bigexp
1548         ssl     a4              /* shift by 64 - a4 */
1549         bgei    a4, 32, .Lfixunssfdi_smallshift
1550         srl     xl, xh
1551         movi    xh, 0
1552
1553 .Lfixunssfdi_shifted:
1554         /* Negate the result if sign != 0.  */
1555         bgez    a7, 1f
1556         neg     xl, xl
1557         neg     xh, xh
1558         beqz    xl, 1f
1559         addi    xh, xh, -1
1560 1:      leaf_return
1561
1562 .Lfixunssfdi_smallshift:
1563         movi    xl, 0
1564         src     xl, xh, xl
1565         srl     xh, xh
1566         j       .Lfixunssfdi_shifted
1567
1568 .Lfixunssfdi_nan_or_inf:
1569         /* Handle Infinity and NaN.  */
1570         slli    a4, a2, 9
1571         beqz    a4, .Lfixunssfdi_maxint
1572
1573         /* Translate NaN to 0xffffffff.... */
1574 1:      movi    xh, -1
1575         movi    xl, -1
1576         leaf_return
1577
1578 .Lfixunssfdi_maxint:
1579         bgez    a2, 1b
1580 2:      slli    xh, a6, 8       /* 0x80000000 */
1581         movi    xl, 0
1582         leaf_return
1583
1584 .Lfixunssfdi_zero:
1585         movi    xh, 0
1586         movi    xl, 0
1587         leaf_return
1588
1589 .Lfixunssfdi_bigexp:
1590         /* Handle unsigned maximum exponent case.  */
1591         bltz    a7, 2b
1592         movi    xl, 0
1593         leaf_return             /* no shift needed */
1594
1595 #endif /* L_fixunssfdi */
1596
1597 #ifdef L_floatsisf
1598
1599         .align  4
1600         .global __floatunsisf
1601         .type   __floatunsisf, @function
1602 __floatunsisf:
1603         leaf_entry sp, 16
1604         beqz    a2, .Lfloatsisf_return
1605
1606         /* Set the sign to zero and jump to the floatsisf code.  */
1607         movi    a7, 0
1608         j       .Lfloatsisf_normalize
1609
1610         .align  4
1611         .global __floatsisf
1612         .type   __floatsisf, @function
1613 __floatsisf:
1614         leaf_entry sp, 16
1615
1616         /* Check for zero.  */
1617         beqz    a2, .Lfloatsisf_return
1618
1619         /* Save the sign.  */
1620         extui   a7, a2, 31, 1
1621
1622         /* Get the absolute value.  */
1623 #if XCHAL_HAVE_ABS
1624         abs     a2, a2
1625 #else
1626         neg     a4, a2
1627         movltz  a2, a4, a2
1628 #endif
1629
1630 .Lfloatsisf_normalize:
1631         /* Normalize with the first 1 bit in the msb.  */
1632         do_nsau a4, a2, a5, a6
1633         ssl     a4
1634         sll     a5, a2
1635
1636         /* Shift the mantissa into position, with rounding bits in a6.  */
1637         srli    a2, a5, 8
1638         slli    a6, a5, (32 - 8)
1639
1640         /* Set the exponent.  */
1641         movi    a5, 0x9d        /* 0x7e + 31 */
1642         sub     a5, a5, a4
1643         slli    a5, a5, 23
1644         add     a2, a2, a5
1645
1646         /* Add the sign.  */
1647         slli    a7, a7, 31
1648         or      a2, a2, a7
1649
1650         /* Round up if the leftover fraction is >= 1/2.  */
1651         bgez    a6, .Lfloatsisf_return
1652         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1653
1654         /* Check if the leftover fraction is exactly 1/2.  */
1655         slli    a6, a6, 1
1656         beqz    a6, .Lfloatsisf_exactlyhalf
1657
1658 .Lfloatsisf_return:
1659         leaf_return
1660
1661 .Lfloatsisf_exactlyhalf:
1662         /* Round down to the nearest even value.  */
1663         srli    a2, a2, 1
1664         slli    a2, a2, 1
1665         leaf_return
1666
1667 #endif /* L_floatsisf */
1668
1669 #ifdef L_floatdisf
1670
1671         .align  4
1672         .global __floatundisf
1673         .type   __floatundisf, @function
1674 __floatundisf:
1675         leaf_entry sp, 16
1676
1677         /* Check for zero.  */
1678         or      a4, xh, xl
1679         beqz    a4, 2f
1680
1681         /* Set the sign to zero and jump to the floatdisf code.  */
1682         movi    a7, 0
1683         j       .Lfloatdisf_normalize
1684
1685         .align  4
1686         .global __floatdisf
1687         .type   __floatdisf, @function
1688 __floatdisf:
1689         leaf_entry sp, 16
1690
1691         /* Check for zero.  */
1692         or      a4, xh, xl
1693         beqz    a4, 2f
1694
1695         /* Save the sign.  */
1696         extui   a7, xh, 31, 1
1697
1698         /* Get the absolute value.  */
1699         bgez    xh, .Lfloatdisf_normalize
1700         neg     xl, xl
1701         neg     xh, xh
1702         beqz    xl, .Lfloatdisf_normalize
1703         addi    xh, xh, -1
1704
1705 .Lfloatdisf_normalize:
1706         /* Normalize with the first 1 bit in the msb of xh.  */
1707         beqz    xh, .Lfloatdisf_bigshift
1708         do_nsau a4, xh, a5, a6
1709         ssl     a4
1710         src     xh, xh, xl
1711         sll     xl, xl
1712
1713 .Lfloatdisf_shifted:
1714         /* Shift the mantissa into position, with rounding bits in a6.  */
1715         ssai    8
1716         sll     a5, xl
1717         src     a6, xh, xl
1718         srl     xh, xh
1719         beqz    a5, 1f
1720         movi    a5, 1
1721         or      a6, a6, a5
1722 1:
1723         /* Set the exponent.  */
1724         movi    a5, 0xbd        /* 0x7e + 63 */
1725         sub     a5, a5, a4
1726         slli    a5, a5, 23
1727         add     a2, xh, a5
1728
1729         /* Add the sign.  */
1730         slli    a7, a7, 31
1731         or      a2, a2, a7
1732
1733         /* Round up if the leftover fraction is >= 1/2.  */
1734         bgez    a6, 2f
1735         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1736
1737         /* Check if the leftover fraction is exactly 1/2.  */
1738         slli    a6, a6, 1
1739         beqz    a6, .Lfloatdisf_exactlyhalf
1740 2:      leaf_return
1741
1742 .Lfloatdisf_bigshift:
1743         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1744         do_nsau a4, xl, a5, a6
1745         ssl     a4
1746         sll     xh, xl
1747         movi    xl, 0
1748         addi    a4, a4, 32
1749         j       .Lfloatdisf_shifted
1750
1751 .Lfloatdisf_exactlyhalf:
1752         /* Round down to the nearest even value.  */
1753         srli    a2, a2, 1
1754         slli    a2, a2, 1
1755         leaf_return
1756
1757 #endif /* L_floatdisf */