libgcc/config/xtensa/ieee754-sf.S

   1 /* IEEE-754 single-precision functions for Xtensa
   2    Copyright (C) 2006-2018 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negsf2
  44
  45         .align  4
  46         .global __negsf2
  47         .type   __negsf2, @function
  48 __negsf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     a2, a2, a4
  52         leaf_return
  53
  54 #endif /* L_negsf2 */
  55
  56 #ifdef L_addsubsf3
  57
  58         /* Addition */
  59 __addsf3_aux:
  60
  61         /* Handle NaNs and Infinities.  (This code is placed before the
  62            start of the function just to keep it in range of the limited
  63            branch displacements.)  */
  64
  65 .Ladd_xnan_or_inf:
  66         /* If y is neither Infinity nor NaN, return x.  */
  67         bnall   a3, a6, .Ladd_return_nan_or_inf
  68         /* If x is a NaN, return it.  Otherwise, return y.  */
  69         slli    a7, a2, 9
  70         bnez    a7, .Ladd_return_nan
  71
  72 .Ladd_ynan_or_inf:
  73         /* Return y.  */
  74         mov     a2, a3
  75
  76 .Ladd_return_nan_or_inf:
  77         slli    a7, a2, 9
  78         bnez    a7, .Ladd_return_nan
  79         leaf_return
  80
  81 .Ladd_return_nan:
  82         movi    a6, 0x400000    /* make it a quiet NaN */
  83         or      a2, a2, a6
  84         leaf_return
  85
  86 .Ladd_opposite_signs:
  87         /* Operand signs differ.  Do a subtraction.  */
  88         slli    a7, a6, 8
  89         xor     a3, a3, a7
  90         j       .Lsub_same_sign
  91
  92         .align  4
  93         .global __addsf3
  94         .type   __addsf3, @function
  95 __addsf3:
  96         leaf_entry sp, 16
  97         movi    a6, 0x7f800000
  98
  99         /* Check if the two operands have the same sign.  */
 100         xor     a7, a2, a3
 101         bltz    a7, .Ladd_opposite_signs
 102
 103 .Ladd_same_sign:
 104         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 105         ball    a2, a6, .Ladd_xnan_or_inf
 106         ball    a3, a6, .Ladd_ynan_or_inf
 107
 108         /* Compare the exponents.  The smaller operand will be shifted
 109            right by the exponent difference and added to the larger
 110            one.  */
 111         extui   a7, a2, 23, 9
 112         extui   a8, a3, 23, 9
 113         bltu    a7, a8, .Ladd_shiftx
 114
 115 .Ladd_shifty:
 116         /* Check if the smaller (or equal) exponent is zero.  */
 117         bnone   a3, a6, .Ladd_yexpzero
 118
 119         /* Replace y sign/exponent with 0x008.  */
 120         or      a3, a3, a6
 121         slli    a3, a3, 8
 122         srli    a3, a3, 8
 123
 124 .Ladd_yexpdiff:
 125         /* Compute the exponent difference.  */
 126         sub     a10, a7, a8
 127
 128         /* Exponent difference > 32 -- just return the bigger value.  */
 129         bgeui   a10, 32, 1f
 130
 131         /* Shift y right by the exponent difference.  Any bits that are
 132            shifted out of y are saved in a9 for rounding the result.  */
 133         ssr     a10
 134         movi    a9, 0
 135         src     a9, a3, a9
 136         srl     a3, a3
 137
 138         /* Do the addition.  */
 139         add     a2, a2, a3
 140
 141         /* Check if the add overflowed into the exponent.  */
 142         extui   a10, a2, 23, 9
 143         beq     a10, a7, .Ladd_round
 144         mov     a8, a7
 145         j       .Ladd_carry
 146
 147 .Ladd_yexpzero:
 148         /* y is a subnormal value.  Replace its sign/exponent with zero,
 149            i.e., no implicit "1.0", and increment the apparent exponent
 150            because subnormals behave as if they had the minimum (nonzero)
 151            exponent.  Test for the case when both exponents are zero.  */
 152         slli    a3, a3, 9
 153         srli    a3, a3, 9
 154         bnone   a2, a6, .Ladd_bothexpzero
 155         addi    a8, a8, 1
 156         j       .Ladd_yexpdiff
 157
 158 .Ladd_bothexpzero:
 159         /* Both exponents are zero.  Handle this as a special case.  There
 160            is no need to shift or round, and the normal code for handling
 161            a carry into the exponent field will not work because it
 162            assumes there is an implicit "1.0" that needs to be added.  */
 163         add     a2, a2, a3
 164 1:      leaf_return
 165
 166 .Ladd_xexpzero:
 167         /* Same as "yexpzero" except skip handling the case when both
 168            exponents are zero.  */
 169         slli    a2, a2, 9
 170         srli    a2, a2, 9
 171         addi    a7, a7, 1
 172         j       .Ladd_xexpdiff
 173
 174 .Ladd_shiftx:
 175         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 176            because the exponent difference is always nonzero in this version,
 177            the shift sequence can use SLL and skip loading a constant zero.  */
 178         bnone   a2, a6, .Ladd_xexpzero
 179
 180         or      a2, a2, a6
 181         slli    a2, a2, 8
 182         srli    a2, a2, 8
 183
 184 .Ladd_xexpdiff:
 185         sub     a10, a8, a7
 186         bgeui   a10, 32, .Ladd_returny
 187
 188         ssr     a10
 189         sll     a9, a2
 190         srl     a2, a2
 191
 192         add     a2, a2, a3
 193
 194         /* Check if the add overflowed into the exponent.  */
 195         extui   a10, a2, 23, 9
 196         bne     a10, a8, .Ladd_carry
 197
 198 .Ladd_round:
 199         /* Round up if the leftover fraction is >= 1/2.  */
 200         bgez    a9, 1f
 201         addi    a2, a2, 1
 202
 203         /* Check if the leftover fraction is exactly 1/2.  */
 204         slli    a9, a9, 1
 205         beqz    a9, .Ladd_exactlyhalf
 206 1:      leaf_return
 207
 208 .Ladd_returny:
 209         mov     a2, a3
 210         leaf_return
 211
 212 .Ladd_carry:
 213         /* The addition has overflowed into the exponent field, so the
 214            value needs to be renormalized.  The mantissa of the result
 215            can be recovered by subtracting the original exponent and
 216            adding 0x800000 (which is the explicit "1.0" for the
 217            mantissa of the non-shifted operand -- the "1.0" for the
 218            shifted operand was already added).  The mantissa can then
 219            be shifted right by one bit.  The explicit "1.0" of the
 220            shifted mantissa then needs to be replaced by the exponent,
 221            incremented by one to account for the normalizing shift.
 222            It is faster to combine these operations: do the shift first
 223            and combine the additions and subtractions.  If x is the
 224            original exponent, the result is:
 225                shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
 226            or:
 227                shifted mantissa + ((x + 1) << 22)
 228            Note that the exponent is incremented here by leaving the
 229            explicit "1.0" of the mantissa in the exponent field.  */
 230
 231         /* Shift x right by one bit.  Save the lsb.  */
 232         mov     a10, a2
 233         srli    a2, a2, 1
 234
 235         /* See explanation above.  The original exponent is in a8.  */
 236         addi    a8, a8, 1
 237         slli    a8, a8, 22
 238         add     a2, a2, a8
 239
 240         /* Return an Infinity if the exponent overflowed.  */
 241         ball    a2, a6, .Ladd_infinity
 242
 243         /* Same thing as the "round" code except the msb of the leftover
 244            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 245         bbci.l  a10, 0, 1f
 246         addi    a2, a2, 1
 247         beqz    a9, .Ladd_exactlyhalf
 248 1:      leaf_return
 249
 250 .Ladd_infinity:
 251         /* Clear the mantissa.  */
 252         srli    a2, a2, 23
 253         slli    a2, a2, 23
 254
 255         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 256         slli    a8, a8, 1
 257         or      a2, a2, a8
 258         leaf_return
 259
 260 .Ladd_exactlyhalf:
 261         /* Round down to the nearest even value.  */
 262         srli    a2, a2, 1
 263         slli    a2, a2, 1
 264         leaf_return
 265
 266
 267         /* Subtraction */
 268 __subsf3_aux:
 269
 270         /* Handle NaNs and Infinities.  (This code is placed before the
 271            start of the function just to keep it in range of the limited
 272            branch displacements.)  */
 273
 274 .Lsub_xnan_or_inf:
 275         /* If y is neither Infinity nor NaN, return x.  */
 276         bnall   a3, a6, .Lsub_return_nan_or_inf
 277         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 278
 279 .Lsub_return_nan:
 280         movi    a4, 0x400000    /* make it a quiet NaN */
 281         or      a2, a2, a4
 282         leaf_return
 283
 284 .Lsub_ynan_or_inf:
 285         /* Negate y and return it.  */
 286         slli    a7, a6, 8
 287         xor     a2, a3, a7
 288
 289 .Lsub_return_nan_or_inf:
 290         slli    a7, a2, 9
 291         bnez    a7, .Lsub_return_nan
 292         leaf_return
 293
 294 .Lsub_opposite_signs:
 295         /* Operand signs differ.  Do an addition.  */
 296         slli    a7, a6, 8
 297         xor     a3, a3, a7
 298         j       .Ladd_same_sign
 299
 300         .align  4
 301         .global __subsf3
 302         .type   __subsf3, @function
 303 __subsf3:
 304         leaf_entry sp, 16
 305         movi    a6, 0x7f800000
 306
 307         /* Check if the two operands have the same sign.  */
 308         xor     a7, a2, a3
 309         bltz    a7, .Lsub_opposite_signs
 310
 311 .Lsub_same_sign:
 312         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 313         ball    a2, a6, .Lsub_xnan_or_inf
 314         ball    a3, a6, .Lsub_ynan_or_inf
 315
 316         /* Compare the operands.  In contrast to addition, the entire
 317            value matters here.  */
 318         extui   a7, a2, 23, 8
 319         extui   a8, a3, 23, 8
 320         bltu    a2, a3, .Lsub_xsmaller
 321
 322 .Lsub_ysmaller:
 323         /* Check if the smaller (or equal) exponent is zero.  */
 324         bnone   a3, a6, .Lsub_yexpzero
 325
 326         /* Replace y sign/exponent with 0x008.  */
 327         or      a3, a3, a6
 328         slli    a3, a3, 8
 329         srli    a3, a3, 8
 330
 331 .Lsub_yexpdiff:
 332         /* Compute the exponent difference.  */
 333         sub     a10, a7, a8
 334
 335         /* Exponent difference > 32 -- just return the bigger value.  */
 336         bgeui   a10, 32, 1f
 337
 338         /* Shift y right by the exponent difference.  Any bits that are
 339            shifted out of y are saved in a9 for rounding the result.  */
 340         ssr     a10
 341         movi    a9, 0
 342         src     a9, a3, a9
 343         srl     a3, a3
 344
 345         sub     a2, a2, a3
 346
 347         /* Subtract the leftover bits in a9 from zero and propagate any
 348            borrow from a2.  */
 349         neg     a9, a9
 350         addi    a10, a2, -1
 351         movnez  a2, a10, a9
 352
 353         /* Check if the subtract underflowed into the exponent.  */
 354         extui   a10, a2, 23, 8
 355         beq     a10, a7, .Lsub_round
 356         j       .Lsub_borrow
 357
 358 .Lsub_yexpzero:
 359         /* Return zero if the inputs are equal.  (For the non-subnormal
 360            case, subtracting the "1.0" will cause a borrow from the exponent
 361            and this case can be detected when handling the borrow.)  */
 362         beq     a2, a3, .Lsub_return_zero
 363
 364         /* y is a subnormal value.  Replace its sign/exponent with zero,
 365            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 366            y's apparent exponent because subnormals behave as if they had
 367            the minimum (nonzero) exponent.  */
 368         slli    a3, a3, 9
 369         srli    a3, a3, 9
 370         bnone   a2, a6, .Lsub_yexpdiff
 371         addi    a8, a8, 1
 372         j       .Lsub_yexpdiff
 373
 374 .Lsub_returny:
 375         /* Negate and return y.  */
 376         slli    a7, a6, 8
 377         xor     a2, a3, a7
 378 1:      leaf_return
 379
 380 .Lsub_xsmaller:
 381         /* Same thing as the "ysmaller" code, but with x and y swapped and
 382            with y negated.  */
 383         bnone   a2, a6, .Lsub_xexpzero
 384
 385         or      a2, a2, a6
 386         slli    a2, a2, 8
 387         srli    a2, a2, 8
 388
 389 .Lsub_xexpdiff:
 390         sub     a10, a8, a7
 391         bgeui   a10, 32, .Lsub_returny
 392
 393         ssr     a10
 394         movi    a9, 0
 395         src     a9, a2, a9
 396         srl     a2, a2
 397
 398         /* Negate y.  */
 399         slli    a11, a6, 8
 400         xor     a3, a3, a11
 401
 402         sub     a2, a3, a2
 403
 404         neg     a9, a9
 405         addi    a10, a2, -1
 406         movnez  a2, a10, a9
 407
 408         /* Check if the subtract underflowed into the exponent.  */
 409         extui   a10, a2, 23, 8
 410         bne     a10, a8, .Lsub_borrow
 411
 412 .Lsub_round:
 413         /* Round up if the leftover fraction is >= 1/2.  */
 414         bgez    a9, 1f
 415         addi    a2, a2, 1
 416
 417         /* Check if the leftover fraction is exactly 1/2.  */
 418         slli    a9, a9, 1
 419         beqz    a9, .Lsub_exactlyhalf
 420 1:      leaf_return
 421
 422 .Lsub_xexpzero:
 423         /* Same as "yexpzero".  */
 424         beq     a2, a3, .Lsub_return_zero
 425         slli    a2, a2, 9
 426         srli    a2, a2, 9
 427         bnone   a3, a6, .Lsub_xexpdiff
 428         addi    a7, a7, 1
 429         j       .Lsub_xexpdiff
 430
 431 .Lsub_return_zero:
 432         movi    a2, 0
 433         leaf_return
 434
 435 .Lsub_borrow:
 436         /* The subtraction has underflowed into the exponent field, so the
 437            value needs to be renormalized.  Shift the mantissa left as
 438            needed to remove any leading zeros and adjust the exponent
 439            accordingly.  If the exponent is not large enough to remove
 440            all the leading zeros, the result will be a subnormal value.  */
 441
 442         slli    a8, a2, 9
 443         beqz    a8, .Lsub_xzero
 444         do_nsau a6, a8, a7, a11
 445         srli    a8, a8, 9
 446         bge     a6, a10, .Lsub_subnormal
 447         addi    a6, a6, 1
 448
 449 .Lsub_normalize_shift:
 450         /* Shift the mantissa (a8/a9) left by a6.  */
 451         ssl     a6
 452         src     a8, a8, a9
 453         sll     a9, a9
 454
 455         /* Combine the shifted mantissa with the sign and exponent,
 456            decrementing the exponent by a6.  (The exponent has already
 457            been decremented by one due to the borrow from the subtraction,
 458            but adding the mantissa will increment the exponent by one.)  */
 459         srli    a2, a2, 23
 460         sub     a2, a2, a6
 461         slli    a2, a2, 23
 462         add     a2, a2, a8
 463         j       .Lsub_round
 464
 465 .Lsub_exactlyhalf:
 466         /* Round down to the nearest even value.  */
 467         srli    a2, a2, 1
 468         slli    a2, a2, 1
 469         leaf_return
 470
 471 .Lsub_xzero:
 472         /* If there was a borrow from the exponent, and the mantissa and
 473            guard digits are all zero, then the inputs were equal and the
 474            result should be zero.  */
 475         beqz    a9, .Lsub_return_zero
 476
 477         /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
 478         addi    a11, a10, -24
 479         movi    a6, 24
 480         movltz  a6, a10, a11
 481         j       .Lsub_normalize_shift
 482
 483 .Lsub_subnormal:
 484         /* The exponent is too small to shift away all the leading zeros.
 485            Set a6 to the current exponent (which has already been
 486            decremented by the borrow) so that the exponent of the result
 487            will be zero.  Do not add 1 to a6 in this case, because: (1)
 488            adding the mantissa will not increment the exponent, so there is
 489            no need to subtract anything extra from the exponent to
 490            compensate, and (2) the effective exponent of a subnormal is 1
 491            not 0 so the shift amount must be 1 smaller than normal. */
 492         mov     a6, a10
 493         j       .Lsub_normalize_shift
 494
 495 #endif /* L_addsubsf3 */
 496
 497 #ifdef L_mulsf3
 498
 499         /* Multiplication */
 500 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 501 #define XCHAL_NO_MUL 1
 502 #endif
 503
 504         .literal_position
 505 __mulsf3_aux:
 506
 507         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 508            (This code is placed before the start of the function just to
 509            keep it in range of the limited branch displacements.)  */
 510
 511 .Lmul_xexpzero:
 512         /* Clear the sign bit of x.  */
 513         slli    a2, a2, 1
 514         srli    a2, a2, 1
 515
 516         /* If x is zero, return zero.  */
 517         beqz    a2, .Lmul_return_zero
 518
 519         /* Normalize x.  Adjust the exponent in a8.  */
 520         do_nsau a10, a2, a11, a12
 521         addi    a10, a10, -8
 522         ssl     a10
 523         sll     a2, a2
 524         movi    a8, 1
 525         sub     a8, a8, a10
 526         j       .Lmul_xnormalized
 527
 528 .Lmul_yexpzero:
 529         /* Clear the sign bit of y.  */
 530         slli    a3, a3, 1
 531         srli    a3, a3, 1
 532
 533         /* If y is zero, return zero.  */
 534         beqz    a3, .Lmul_return_zero
 535
 536         /* Normalize y.  Adjust the exponent in a9.  */
 537         do_nsau a10, a3, a11, a12
 538         addi    a10, a10, -8
 539         ssl     a10
 540         sll     a3, a3
 541         movi    a9, 1
 542         sub     a9, a9, a10
 543         j       .Lmul_ynormalized
 544
 545 .Lmul_return_zero:
 546         /* Return zero with the appropriate sign bit.  */
 547         srli    a2, a7, 31
 548         slli    a2, a2, 31
 549         j       .Lmul_done
 550
 551 .Lmul_xnan_or_inf:
 552         /* If y is zero, return NaN.  */
 553         slli    a8, a3, 1
 554         beqz    a8, .Lmul_return_nan
 555         /* If y is NaN, return y.  */
 556         bnall   a3, a6, .Lmul_returnx
 557         slli    a8, a3, 9
 558         beqz    a8, .Lmul_returnx
 559
 560 .Lmul_returny:
 561         mov     a2, a3
 562
 563 .Lmul_returnx:
 564         slli    a8, a2, 9
 565         bnez    a8, .Lmul_return_nan
 566         /* Set the sign bit and return.  */
 567         extui   a7, a7, 31, 1
 568         slli    a2, a2, 1
 569         ssai    1
 570         src     a2, a7, a2
 571         j       .Lmul_done
 572
 573 .Lmul_ynan_or_inf:
 574         /* If x is zero, return NaN.  */
 575         slli    a8, a2, 1
 576         bnez    a8, .Lmul_returny
 577         mov     a2, a3
 578
 579 .Lmul_return_nan:
 580         movi    a4, 0x400000    /* make it a quiet NaN */
 581         or      a2, a2, a4
 582         j       .Lmul_done
 583
 584         .align  4
 585         .global __mulsf3
 586         .type   __mulsf3, @function
 587 __mulsf3:
 588 #if __XTENSA_CALL0_ABI__
 589         leaf_entry sp, 32
 590         addi    sp, sp, -32
 591         s32i    a12, sp, 16
 592         s32i    a13, sp, 20
 593         s32i    a14, sp, 24
 594         s32i    a15, sp, 28
 595 #elif XCHAL_NO_MUL
 596         /* This is not really a leaf function; allocate enough stack space
 597            to allow CALL12s to a helper function.  */
 598         leaf_entry sp, 64
 599 #else
 600         leaf_entry sp, 32
 601 #endif
 602         movi    a6, 0x7f800000
 603
 604         /* Get the sign of the result.  */
 605         xor     a7, a2, a3
 606
 607         /* Check for NaN and infinity.  */
 608         ball    a2, a6, .Lmul_xnan_or_inf
 609         ball    a3, a6, .Lmul_ynan_or_inf
 610
 611         /* Extract the exponents.  */
 612         extui   a8, a2, 23, 8
 613         extui   a9, a3, 23, 8
 614
 615         beqz    a8, .Lmul_xexpzero
 616 .Lmul_xnormalized:
 617         beqz    a9, .Lmul_yexpzero
 618 .Lmul_ynormalized:
 619
 620         /* Add the exponents.  */
 621         add     a8, a8, a9
 622
 623         /* Replace sign/exponent fields with explicit "1.0".  */
 624         movi    a10, 0xffffff
 625         or      a2, a2, a6
 626         and     a2, a2, a10
 627         or      a3, a3, a6
 628         and     a3, a3, a10
 629
 630         /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
 631
 632 #if XCHAL_HAVE_MUL32_HIGH
 633
 634         mull    a6, a2, a3
 635         muluh   a2, a2, a3
 636
 637 #else
 638
 639         /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
 640            products.  These partial products are:
 641
 642                 0 xl * yl
 643
 644                 1 xl * yh
 645                 2 xh * yl
 646
 647                 3 xh * yh
 648
 649            If using the Mul16 or Mul32 multiplier options, these input
 650            chunks must be stored in separate registers.  For Mac16, the
 651            UMUL.AA.* opcodes can specify that the inputs come from either
 652            half of the registers, so there is no need to shift them out
 653            ahead of time.  If there is no multiply hardware, the 16-bit
 654            chunks can be extracted when setting up the arguments to the
 655            separate multiply function.  */
 656
 657 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 658         /* Calling a separate multiply function will clobber a0 and requires
 659            use of a8 as a temporary, so save those values now.  (The function
 660            uses a custom ABI so nothing else needs to be saved.)  */
 661         s32i    a0, sp, 0
 662         s32i    a8, sp, 4
 663 #endif
 664
 665 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 666
 667 #define a2h a4
 668 #define a3h a5
 669
 670         /* Get the high halves of the inputs into registers.  */
 671         srli    a2h, a2, 16
 672         srli    a3h, a3, 16
 673
 674 #define a2l a2
 675 #define a3l a3
 676
 677 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 678         /* Clear the high halves of the inputs.  This does not matter
 679            for MUL16 because the high bits are ignored.  */
 680         extui   a2, a2, 0, 16
 681         extui   a3, a3, 0, 16
 682 #endif
 683 #endif /* MUL16 || MUL32 */
 684
 685
 686 #if XCHAL_HAVE_MUL16
 687
 688 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 689         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 690
 691 #elif XCHAL_HAVE_MUL32
 692
 693 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 694         mull    dst, xreg ## xhalf, yreg ## yhalf
 695
 696 #elif XCHAL_HAVE_MAC16
 697
 698 /* The preprocessor insists on inserting a space when concatenating after
 699    a period in the definition of do_mul below.  These macros are a workaround
 700    using underscores instead of periods when doing the concatenation.  */
 701 #define umul_aa_ll umul.aa.ll
 702 #define umul_aa_lh umul.aa.lh
 703 #define umul_aa_hl umul.aa.hl
 704 #define umul_aa_hh umul.aa.hh
 705
 706 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 707         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 708         rsr     dst, ACCLO
 709
 710 #else /* no multiply hardware */
 711
 712 #define set_arg_l(dst, src) \
 713         extui   dst, src, 0, 16
 714 #define set_arg_h(dst, src) \
 715         srli    dst, src, 16
 716
 717 #if __XTENSA_CALL0_ABI__
 718 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 719         set_arg_ ## xhalf (a13, xreg); \
 720         set_arg_ ## yhalf (a14, yreg); \
 721         call0   .Lmul_mulsi3; \
 722         mov     dst, a12
 723 #else
 724 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 725         set_arg_ ## xhalf (a14, xreg); \
 726         set_arg_ ## yhalf (a15, yreg); \
 727         call12  .Lmul_mulsi3; \
 728         mov     dst, a14
 729 #endif /* __XTENSA_CALL0_ABI__ */
 730
 731 #endif /* no multiply hardware */
 732
 733         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 734         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 735         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 736         movi    a9, 0
 737         add     a6, a6, a11
 738         bgeu    a6, a11, 1f
 739         addi    a9, a9, 1
 740 1:
 741         /* Shift the high half of a9/a6 into position in a9.  Note that
 742            this value can be safely incremented without any carry-outs.  */
 743         ssai    16
 744         src     a9, a9, a6
 745
 746         /* Compute the low word into a6.  */
 747         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 748         sll     a6, a6
 749         add     a6, a6, a11
 750         bgeu    a6, a11, 1f
 751         addi    a9, a9, 1
 752 1:
 753         /* Compute the high word into a2.  */
 754         do_mul(a2, a2, h, a3, h)        /* pp 3 */
 755         add     a2, a2, a9
 756
 757 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 758         /* Restore values saved on the stack during the multiplication.  */
 759         l32i    a0, sp, 0
 760         l32i    a8, sp, 4
 761 #endif
 762 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
 763
 764         /* Shift left by 9 bits, unless there was a carry-out from the
 765            multiply, in which case, shift by 8 bits and increment the
 766            exponent.  */
 767         movi    a4, 9
 768         srli    a5, a2, 24 - 9
 769         beqz    a5, 1f
 770         addi    a4, a4, -1
 771         addi    a8, a8, 1
 772 1:      ssl     a4
 773         src     a2, a2, a6
 774         sll     a6, a6
 775
 776         /* Subtract the extra bias from the exponent sum (plus one to account
 777            for the explicit "1.0" of the mantissa that will be added to the
 778            exponent in the final result).  */
 779         movi    a4, 0x80
 780         sub     a8, a8, a4
 781
 782         /* Check for over/underflow.  The value in a8 is one less than the
 783            final exponent, so values in the range 0..fd are OK here.  */
 784         movi    a4, 0xfe
 785         bgeu    a8, a4, .Lmul_overflow
 786
 787 .Lmul_round:
 788         /* Round.  */
 789         bgez    a6, .Lmul_rounded
 790         addi    a2, a2, 1
 791         slli    a6, a6, 1
 792         beqz    a6, .Lmul_exactlyhalf
 793
 794 .Lmul_rounded:
 795         /* Add the exponent to the mantissa.  */
 796         slli    a8, a8, 23
 797         add     a2, a2, a8
 798
 799 .Lmul_addsign:
 800         /* Add the sign bit.  */
 801         srli    a7, a7, 31
 802         slli    a7, a7, 31
 803         or      a2, a2, a7
 804
 805 .Lmul_done:
 806 #if __XTENSA_CALL0_ABI__
 807         l32i    a12, sp, 16
 808         l32i    a13, sp, 20
 809         l32i    a14, sp, 24
 810         l32i    a15, sp, 28
 811         addi    sp, sp, 32
 812 #endif
 813         leaf_return
 814
 815 .Lmul_exactlyhalf:
 816         /* Round down to the nearest even value.  */
 817         srli    a2, a2, 1
 818         slli    a2, a2, 1
 819         j       .Lmul_rounded
 820
 821 .Lmul_overflow:
 822         bltz    a8, .Lmul_underflow
 823         /* Return +/- Infinity.  */
 824         movi    a8, 0xff
 825         slli    a2, a8, 23
 826         j       .Lmul_addsign
 827
 828 .Lmul_underflow:
 829         /* Create a subnormal value, where the exponent field contains zero,
 830            but the effective exponent is 1.  The value of a8 is one less than
 831            the actual exponent, so just negate it to get the shift amount.  */
 832         neg     a8, a8
 833         mov     a9, a6
 834         ssr     a8
 835         bgeui   a8, 32, .Lmul_flush_to_zero
 836
 837         /* Shift a2 right.  Any bits that are shifted out of a2 are saved
 838            in a6 (combined with the shifted-out bits currently in a6) for
 839            rounding the result.  */
 840         sll     a6, a2
 841         srl     a2, a2
 842
 843         /* Set the exponent to zero.  */
 844         movi    a8, 0
 845
 846         /* Pack any nonzero bits shifted out into a6.  */
 847         beqz    a9, .Lmul_round
 848         movi    a9, 1
 849         or      a6, a6, a9
 850         j       .Lmul_round
 851
 852 .Lmul_flush_to_zero:
 853         /* Return zero with the appropriate sign bit.  */
 854         srli    a2, a7, 31
 855         slli    a2, a2, 31
 856         j       .Lmul_done
 857
 858 #if XCHAL_NO_MUL
 859
 860         /* For Xtensa processors with no multiply hardware, this simplified
 861            version of _mulsi3 is used for multiplying 16-bit chunks of
 862            the floating-point mantissas.  When using CALL0, this function
 863            uses a custom ABI: the inputs are passed in a13 and a14, the
 864            result is returned in a12, and a8 and a15 are clobbered.  */
 865         .align  4
 866 .Lmul_mulsi3:
 867         leaf_entry sp, 16
 868         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 869         movi    \dst, 0
 870 1:      add     \tmp1, \src2, \dst
 871         extui   \tmp2, \src1, 0, 1
 872         movnez  \dst, \tmp1, \tmp2
 873
 874         do_addx2 \tmp1, \src2, \dst, \tmp1
 875         extui   \tmp2, \src1, 1, 1
 876         movnez  \dst, \tmp1, \tmp2
 877
 878         do_addx4 \tmp1, \src2, \dst, \tmp1
 879         extui   \tmp2, \src1, 2, 1
 880         movnez  \dst, \tmp1, \tmp2
 881
 882         do_addx8 \tmp1, \src2, \dst, \tmp1
 883         extui   \tmp2, \src1, 3, 1
 884         movnez  \dst, \tmp1, \tmp2
 885
 886         srli    \src1, \src1, 4
 887         slli    \src2, \src2, 4
 888         bnez    \src1, 1b
 889         .endm
 890 #if __XTENSA_CALL0_ABI__
 891         mul_mulsi3_body a12, a13, a14, a15, a8
 892 #else
 893         /* The result will be written into a2, so save that argument in a4.  */
 894         mov     a4, a2
 895         mul_mulsi3_body a2, a4, a3, a5, a6
 896 #endif
 897         leaf_return
 898 #endif /* XCHAL_NO_MUL */
 899 #endif /* L_mulsf3 */
 900
 901 #ifdef L_divsf3
 902
 903         /* Division */
 904
 905 #if XCHAL_HAVE_FP_DIV
 906
 907         .align  4
 908         .global __divsf3
 909         .type   __divsf3, @function
 910 __divsf3:
 911         leaf_entry      sp, 16
 912
 913         wfr             f1, a2  /* dividend */
 914         wfr             f2, a3  /* divisor */
 915
 916         div0.s          f3, f2
 917         nexp01.s        f4, f2
 918         const.s         f5, 1
 919         maddn.s         f5, f4, f3
 920         mov.s           f6, f3
 921         mov.s           f7, f2
 922         nexp01.s        f2, f1
 923         maddn.s         f6, f5, f6
 924         const.s         f5, 1
 925         const.s         f0, 0
 926         neg.s           f8, f2
 927         maddn.s         f5, f4, f6
 928         maddn.s         f0, f8, f3
 929         mkdadj.s        f7, f1
 930         maddn.s         f6, f5, f6
 931         maddn.s         f8, f4, f0
 932         const.s         f3, 1
 933         maddn.s         f3, f4, f6
 934         maddn.s         f0, f8, f6
 935         neg.s           f2, f2
 936         maddn.s         f6, f3, f6
 937         maddn.s         f2, f4, f0
 938         addexpm.s       f0, f7
 939         addexp.s        f6, f7
 940         divn.s          f0, f2, f6
 941
 942         rfr             a2, f0
 943
 944         leaf_return
 945
 946 #else
 947
 948         .literal_position
 949 __divsf3_aux:
 950
 951         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 952            (This code is placed before the start of the function just to
 953            keep it in range of the limited branch displacements.)  */
 954
 955 .Ldiv_yexpzero:
 956         /* Clear the sign bit of y.  */
 957         slli    a3, a3, 1
 958         srli    a3, a3, 1
 959
 960         /* Check for division by zero.  */
 961         beqz    a3, .Ldiv_yzero
 962
 963         /* Normalize y.  Adjust the exponent in a9.  */
 964         do_nsau a10, a3, a4, a5
 965         addi    a10, a10, -8
 966         ssl     a10
 967         sll     a3, a3
 968         movi    a9, 1
 969         sub     a9, a9, a10
 970         j       .Ldiv_ynormalized
 971
 972 .Ldiv_yzero:
 973         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
 974         slli    a4, a2, 1
 975         srli    a4, a4, 1
 976         srli    a2, a7, 31
 977         slli    a2, a2, 31
 978         or      a2, a2, a6
 979         bnez    a4, 1f
 980         movi    a4, 0x400000    /* make it a quiet NaN */
 981         or      a2, a2, a4
 982 1:      leaf_return
 983
 984 .Ldiv_xexpzero:
 985         /* Clear the sign bit of x.  */
 986         slli    a2, a2, 1
 987         srli    a2, a2, 1
 988
 989         /* If x is zero, return zero.  */
 990         beqz    a2, .Ldiv_return_zero
 991
 992         /* Normalize x.  Adjust the exponent in a8.  */
 993         do_nsau a10, a2, a4, a5
 994         addi    a10, a10, -8
 995         ssl     a10
 996         sll     a2, a2
 997         movi    a8, 1
 998         sub     a8, a8, a10
 999         j       .Ldiv_xnormalized
1000
1001 .Ldiv_return_zero:
1002         /* Return zero with the appropriate sign bit.  */
1003         srli    a2, a7, 31
1004         slli    a2, a2, 31
1005         leaf_return
1006
1007 .Ldiv_xnan_or_inf:
1008         /* Set the sign bit of the result.  */
1009         srli    a7, a3, 31
1010         slli    a7, a7, 31
1011         xor     a2, a2, a7
1012         /* If y is NaN or Inf, return NaN.  */
1013         ball    a3, a6, .Ldiv_return_nan
1014         slli    a7, a2, 9
1015         bnez    a7, .Ldiv_return_nan
1016         leaf_return
1017
1018 .Ldiv_ynan_or_inf:
1019         /* If y is Infinity, return zero.  */
1020         slli    a8, a3, 9
1021         beqz    a8, .Ldiv_return_zero
1022         /* y is NaN; return it.  */
1023         mov     a2, a3
1024
1025 .Ldiv_return_nan:
1026         movi    a4, 0x400000    /* make it a quiet NaN */
1027         or      a2, a2, a4
1028         leaf_return
1029
1030         .align  4
1031         .global __divsf3
1032         .type   __divsf3, @function
1033 __divsf3:
1034         leaf_entry sp, 16
1035         movi    a6, 0x7f800000
1036
1037         /* Get the sign of the result.  */
1038         xor     a7, a2, a3
1039
1040         /* Check for NaN and infinity.  */
1041         ball    a2, a6, .Ldiv_xnan_or_inf
1042         ball    a3, a6, .Ldiv_ynan_or_inf
1043
1044         /* Extract the exponents.  */
1045         extui   a8, a2, 23, 8
1046         extui   a9, a3, 23, 8
1047
1048         beqz    a9, .Ldiv_yexpzero
1049 .Ldiv_ynormalized:
1050         beqz    a8, .Ldiv_xexpzero
1051 .Ldiv_xnormalized:
1052
1053         /* Subtract the exponents.  */
1054         sub     a8, a8, a9
1055
1056         /* Replace sign/exponent fields with explicit "1.0".  */
1057         movi    a10, 0xffffff
1058         or      a2, a2, a6
1059         and     a2, a2, a10
1060         or      a3, a3, a6
1061         and     a3, a3, a10
1062
1063         /* The first digit of the mantissa division must be a one.
1064            Shift x (and adjust the exponent) as needed to make this true.  */
1065         bltu    a3, a2, 1f
1066         slli    a2, a2, 1
1067         addi    a8, a8, -1
1068 1:
1069         /* Do the first subtraction and shift.  */
1070         sub     a2, a2, a3
1071         slli    a2, a2, 1
1072
1073         /* Put the quotient into a10.  */
1074         movi    a10, 1
1075
1076         /* Divide one bit at a time for 23 bits.  */
1077         movi    a9, 23
1078 #if XCHAL_HAVE_LOOPS
1079         loop    a9, .Ldiv_loopend
1080 #endif
1081 .Ldiv_loop:
1082         /* Shift the quotient << 1.  */
1083         slli    a10, a10, 1
1084
1085         /* Is this digit a 0 or 1?  */
1086         bltu    a2, a3, 1f
1087
1088         /* Output a 1 and subtract.  */
1089         addi    a10, a10, 1
1090         sub     a2, a2, a3
1091
1092         /* Shift the dividend << 1.  */
1093 1:      slli    a2, a2, 1
1094
1095 #if !XCHAL_HAVE_LOOPS
1096         addi    a9, a9, -1
1097         bnez    a9, .Ldiv_loop
1098 #endif
1099 .Ldiv_loopend:
1100
1101         /* Add the exponent bias (less one to account for the explicit "1.0"
1102            of the mantissa that will be added to the exponent in the final
1103            result).  */
1104         addi    a8, a8, 0x7e
1105
1106         /* Check for over/underflow.  The value in a8 is one less than the
1107            final exponent, so values in the range 0..fd are OK here.  */
1108         movi    a4, 0xfe
1109         bgeu    a8, a4, .Ldiv_overflow
1110
1111 .Ldiv_round:
1112         /* Round.  The remainder (<< 1) is in a2.  */
1113         bltu    a2, a3, .Ldiv_rounded
1114         addi    a10, a10, 1
1115         beq     a2, a3, .Ldiv_exactlyhalf
1116
1117 .Ldiv_rounded:
1118         /* Add the exponent to the mantissa.  */
1119         slli    a8, a8, 23
1120         add     a2, a10, a8
1121
1122 .Ldiv_addsign:
1123         /* Add the sign bit.  */
1124         srli    a7, a7, 31
1125         slli    a7, a7, 31
1126         or      a2, a2, a7
1127         leaf_return
1128
1129 .Ldiv_overflow:
1130         bltz    a8, .Ldiv_underflow
1131         /* Return +/- Infinity.  */
1132         addi    a8, a4, 1       /* 0xff */
1133         slli    a2, a8, 23
1134         j       .Ldiv_addsign
1135
1136 .Ldiv_exactlyhalf:
1137         /* Remainder is exactly half the divisor.  Round even.  */
1138         srli    a10, a10, 1
1139         slli    a10, a10, 1
1140         j       .Ldiv_rounded
1141
1142 .Ldiv_underflow:
1143         /* Create a subnormal value, where the exponent field contains zero,
1144            but the effective exponent is 1.  The value of a8 is one less than
1145            the actual exponent, so just negate it to get the shift amount.  */
1146         neg     a8, a8
1147         ssr     a8
1148         bgeui   a8, 32, .Ldiv_flush_to_zero
1149
1150         /* Shift a10 right.  Any bits that are shifted out of a10 are
1151            saved in a6 for rounding the result.  */
1152         sll     a6, a10
1153         srl     a10, a10
1154
1155         /* Set the exponent to zero.  */
1156         movi    a8, 0
1157
1158         /* Pack any nonzero remainder (in a2) into a6.  */
1159         beqz    a2, 1f
1160         movi    a9, 1
1161         or      a6, a6, a9
1162
1163         /* Round a10 based on the bits shifted out into a6.  */
1164 1:      bgez    a6, .Ldiv_rounded
1165         addi    a10, a10, 1
1166         slli    a6, a6, 1
1167         bnez    a6, .Ldiv_rounded
1168         srli    a10, a10, 1
1169         slli    a10, a10, 1
1170         j       .Ldiv_rounded
1171
1172 .Ldiv_flush_to_zero:
1173         /* Return zero with the appropriate sign bit.  */
1174         srli    a2, a7, 31
1175         slli    a2, a2, 31
1176         leaf_return
1177
1178 #endif /* XCHAL_HAVE_FP_DIV */
1179
1180 #endif /* L_divsf3 */
1181
1182 #ifdef L_cmpsf2
1183
1184         /* Equal and Not Equal */
1185
1186         .align  4
1187         .global __eqsf2
1188         .global __nesf2
1189         .set    __nesf2, __eqsf2
1190         .type   __eqsf2, @function
1191 __eqsf2:
1192         leaf_entry sp, 16
1193         bne     a2, a3, 4f
1194
1195         /* The values are equal but NaN != NaN.  Check the exponent.  */
1196         movi    a6, 0x7f800000
1197         ball    a2, a6, 3f
1198
1199         /* Equal.  */
1200         movi    a2, 0
1201         leaf_return
1202
1203         /* Not equal.  */
1204 2:      movi    a2, 1
1205         leaf_return
1206
1207         /* Check if the mantissas are nonzero.  */
1208 3:      slli    a7, a2, 9
1209         j       5f
1210
1211         /* Check if x and y are zero with different signs.  */
1212 4:      or      a7, a2, a3
1213         slli    a7, a7, 1
1214
1215         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1216            or x when exponent(x) = 0x7f8 and x == y.  */
1217 5:      movi    a2, 0
1218         movi    a3, 1
1219         movnez  a2, a3, a7
1220         leaf_return
1221
1222
1223         /* Greater Than */
1224
1225         .align  4
1226         .global __gtsf2
1227         .type   __gtsf2, @function
1228 __gtsf2:
1229         leaf_entry sp, 16
1230         movi    a6, 0x7f800000
1231         ball    a2, a6, 2f
1232 1:      bnall   a3, a6, .Lle_cmp
1233
1234         /* Check if y is a NaN.  */
1235         slli    a7, a3, 9
1236         beqz    a7, .Lle_cmp
1237         movi    a2, 0
1238         leaf_return
1239
1240         /* Check if x is a NaN.  */
1241 2:      slli    a7, a2, 9
1242         beqz    a7, 1b
1243         movi    a2, 0
1244         leaf_return
1245
1246
1247         /* Less Than or Equal */
1248
1249         .align  4
1250         .global __lesf2
1251         .type   __lesf2, @function
1252 __lesf2:
1253         leaf_entry sp, 16
1254         movi    a6, 0x7f800000
1255         ball    a2, a6, 2f
1256 1:      bnall   a3, a6, .Lle_cmp
1257
1258         /* Check if y is a NaN.  */
1259         slli    a7, a3, 9
1260         beqz    a7, .Lle_cmp
1261         movi    a2, 1
1262         leaf_return
1263
1264         /* Check if x is a NaN.  */
1265 2:      slli    a7, a2, 9
1266         beqz    a7, 1b
1267         movi    a2, 1
1268         leaf_return
1269
1270 .Lle_cmp:
1271         /* Check if x and y have different signs.  */
1272         xor     a7, a2, a3
1273         bltz    a7, .Lle_diff_signs
1274
1275         /* Check if x is negative.  */
1276         bltz    a2, .Lle_xneg
1277
1278         /* Check if x <= y.  */
1279         bltu    a3, a2, 5f
1280 4:      movi    a2, 0
1281         leaf_return
1282
1283 .Lle_xneg:
1284         /* Check if y <= x.  */
1285         bgeu    a2, a3, 4b
1286 5:      movi    a2, 1
1287         leaf_return
1288
1289 .Lle_diff_signs:
1290         bltz    a2, 4b
1291
1292         /* Check if both x and y are zero.  */
1293         or      a7, a2, a3
1294         slli    a7, a7, 1
1295         movi    a2, 1
1296         movi    a3, 0
1297         moveqz  a2, a3, a7
1298         leaf_return
1299
1300
1301         /* Greater Than or Equal */
1302
1303         .align  4
1304         .global __gesf2
1305         .type   __gesf2, @function
1306 __gesf2:
1307         leaf_entry sp, 16
1308         movi    a6, 0x7f800000
1309         ball    a2, a6, 2f
1310 1:      bnall   a3, a6, .Llt_cmp
1311
1312         /* Check if y is a NaN.  */
1313         slli    a7, a3, 9
1314         beqz    a7, .Llt_cmp
1315         movi    a2, -1
1316         leaf_return
1317
1318         /* Check if x is a NaN.  */
1319 2:      slli    a7, a2, 9
1320         beqz    a7, 1b
1321         movi    a2, -1
1322         leaf_return
1323
1324
1325         /* Less Than */
1326
1327         .align  4
1328         .global __ltsf2
1329         .type   __ltsf2, @function
1330 __ltsf2:
1331         leaf_entry sp, 16
1332         movi    a6, 0x7f800000
1333         ball    a2, a6, 2f
1334 1:      bnall   a3, a6, .Llt_cmp
1335
1336         /* Check if y is a NaN.  */
1337         slli    a7, a3, 9
1338         beqz    a7, .Llt_cmp
1339         movi    a2, 0
1340         leaf_return
1341
1342         /* Check if x is a NaN.  */
1343 2:      slli    a7, a2, 9
1344         beqz    a7, 1b
1345         movi    a2, 0
1346         leaf_return
1347
1348 .Llt_cmp:
1349         /* Check if x and y have different signs.  */
1350         xor     a7, a2, a3
1351         bltz    a7, .Llt_diff_signs
1352
1353         /* Check if x is negative.  */
1354         bltz    a2, .Llt_xneg
1355
1356         /* Check if x < y.  */
1357         bgeu    a2, a3, 5f
1358 4:      movi    a2, -1
1359         leaf_return
1360
1361 .Llt_xneg:
1362         /* Check if y < x.  */
1363         bltu    a3, a2, 4b
1364 5:      movi    a2, 0
1365         leaf_return
1366
1367 .Llt_diff_signs:
1368         bgez    a2, 5b
1369
1370         /* Check if both x and y are nonzero.  */
1371         or      a7, a2, a3
1372         slli    a7, a7, 1
1373         movi    a2, 0
1374         movi    a3, -1
1375         movnez  a2, a3, a7
1376         leaf_return
1377
1378
1379         /* Unordered */
1380
1381         .align  4
1382         .global __unordsf2
1383         .type   __unordsf2, @function
1384 __unordsf2:
1385         leaf_entry sp, 16
1386         movi    a6, 0x7f800000
1387         ball    a2, a6, 3f
1388 1:      ball    a3, a6, 4f
1389 2:      movi    a2, 0
1390         leaf_return
1391
1392 3:      slli    a7, a2, 9
1393         beqz    a7, 1b
1394         movi    a2, 1
1395         leaf_return
1396
1397 4:      slli    a7, a3, 9
1398         beqz    a7, 2b
1399         movi    a2, 1
1400         leaf_return
1401
1402 #endif /* L_cmpsf2 */
1403
1404 #ifdef L_fixsfsi
1405
1406         .align  4
1407         .global __fixsfsi
1408         .type   __fixsfsi, @function
1409 __fixsfsi:
1410         leaf_entry sp, 16
1411
1412         /* Check for NaN and Infinity.  */
1413         movi    a6, 0x7f800000
1414         ball    a2, a6, .Lfixsfsi_nan_or_inf
1415
1416         /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1417         extui   a4, a2, 23, 8
1418         addi    a4, a4, -0x7e
1419         bgei    a4, 32, .Lfixsfsi_maxint
1420         blti    a4, 1, .Lfixsfsi_zero
1421
1422         /* Add explicit "1.0" and shift << 8.  */
1423         or      a7, a2, a6
1424         slli    a5, a7, 8
1425
1426         /* Shift back to the right, based on the exponent.  */
1427         ssl     a4              /* shift by 32 - a4 */
1428         srl     a5, a5
1429
1430         /* Negate the result if sign != 0.  */
1431         neg     a2, a5
1432         movgez  a2, a5, a7
1433         leaf_return
1434
1435 .Lfixsfsi_nan_or_inf:
1436         /* Handle Infinity and NaN.  */
1437         slli    a4, a2, 9
1438         beqz    a4, .Lfixsfsi_maxint
1439
1440         /* Translate NaN to +maxint.  */
1441         movi    a2, 0
1442
1443 .Lfixsfsi_maxint:
1444         slli    a4, a6, 8       /* 0x80000000 */
1445         addi    a5, a4, -1      /* 0x7fffffff */
1446         movgez  a4, a5, a2
1447         mov     a2, a4
1448         leaf_return
1449
1450 .Lfixsfsi_zero:
1451         movi    a2, 0
1452         leaf_return
1453
1454 #endif /* L_fixsfsi */
1455
1456 #ifdef L_fixsfdi
1457
1458         .align  4
1459         .global __fixsfdi
1460         .type   __fixsfdi, @function
1461 __fixsfdi:
1462         leaf_entry sp, 16
1463
1464         /* Check for NaN and Infinity.  */
1465         movi    a6, 0x7f800000
1466         ball    a2, a6, .Lfixsfdi_nan_or_inf
1467
1468         /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1469         extui   a4, a2, 23, 8
1470         addi    a4, a4, -0x7e
1471         bgei    a4, 64, .Lfixsfdi_maxint
1472         blti    a4, 1, .Lfixsfdi_zero
1473
1474         /* Add explicit "1.0" and shift << 8.  */
1475         or      a7, a2, a6
1476         slli    xh, a7, 8
1477
1478         /* Shift back to the right, based on the exponent.  */
1479         ssl     a4              /* shift by 64 - a4 */
1480         bgei    a4, 32, .Lfixsfdi_smallshift
1481         srl     xl, xh
1482         movi    xh, 0
1483
1484 .Lfixsfdi_shifted:
1485         /* Negate the result if sign != 0.  */
1486         bgez    a7, 1f
1487         neg     xl, xl
1488         neg     xh, xh
1489         beqz    xl, 1f
1490         addi    xh, xh, -1
1491 1:      leaf_return
1492
1493 .Lfixsfdi_smallshift:
1494         movi    xl, 0
1495         sll     xl, xh
1496         srl     xh, xh
1497         j       .Lfixsfdi_shifted
1498
1499 .Lfixsfdi_nan_or_inf:
1500         /* Handle Infinity and NaN.  */
1501         slli    a4, a2, 9
1502         beqz    a4, .Lfixsfdi_maxint
1503
1504         /* Translate NaN to +maxint.  */
1505         movi    a2, 0
1506
1507 .Lfixsfdi_maxint:
1508         slli    a7, a6, 8       /* 0x80000000 */
1509         bgez    a2, 1f
1510         mov     xh, a7
1511         movi    xl, 0
1512         leaf_return
1513
1514 1:      addi    xh, a7, -1      /* 0x7fffffff */
1515         movi    xl, -1
1516         leaf_return
1517
1518 .Lfixsfdi_zero:
1519         movi    xh, 0
1520         movi    xl, 0
1521         leaf_return
1522
1523 #endif /* L_fixsfdi */
1524
1525 #ifdef L_fixunssfsi
1526
1527         .align  4
1528         .global __fixunssfsi
1529         .type   __fixunssfsi, @function
1530 __fixunssfsi:
1531         leaf_entry sp, 16
1532
1533         /* Check for NaN and Infinity.  */
1534         movi    a6, 0x7f800000
1535         ball    a2, a6, .Lfixunssfsi_nan_or_inf
1536
1537         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1538         extui   a4, a2, 23, 8
1539         addi    a4, a4, -0x7f
1540         bgei    a4, 32, .Lfixunssfsi_maxint
1541         bltz    a4, .Lfixunssfsi_zero
1542
1543         /* Add explicit "1.0" and shift << 8.  */
1544         or      a7, a2, a6
1545         slli    a5, a7, 8
1546
1547         /* Shift back to the right, based on the exponent.  */
1548         addi    a4, a4, 1
1549         beqi    a4, 32, .Lfixunssfsi_bigexp
1550         ssl     a4              /* shift by 32 - a4 */
1551         srl     a5, a5
1552
1553         /* Negate the result if sign != 0.  */
1554         neg     a2, a5
1555         movgez  a2, a5, a7
1556         leaf_return
1557
1558 .Lfixunssfsi_nan_or_inf:
1559         /* Handle Infinity and NaN.  */
1560         slli    a4, a2, 9
1561         beqz    a4, .Lfixunssfsi_maxint
1562
1563         /* Translate NaN to 0xffffffff.  */
1564         movi    a2, -1
1565         leaf_return
1566
1567 .Lfixunssfsi_maxint:
1568         slli    a4, a6, 8       /* 0x80000000 */
1569         movi    a5, -1          /* 0xffffffff */
1570         movgez  a4, a5, a2
1571         mov     a2, a4
1572         leaf_return
1573
1574 .Lfixunssfsi_zero:
1575         movi    a2, 0
1576         leaf_return
1577
1578 .Lfixunssfsi_bigexp:
1579         /* Handle unsigned maximum exponent case.  */
1580         bltz    a2, 1f
1581         mov     a2, a5          /* no shift needed */
1582         leaf_return
1583
1584         /* Return 0x80000000 if negative.  */
1585 1:      slli    a2, a6, 8
1586         leaf_return
1587
1588 #endif /* L_fixunssfsi */
1589
1590 #ifdef L_fixunssfdi
1591
1592         .align  4
1593         .global __fixunssfdi
1594         .type   __fixunssfdi, @function
1595 __fixunssfdi:
1596         leaf_entry sp, 16
1597
1598         /* Check for NaN and Infinity.  */
1599         movi    a6, 0x7f800000
1600         ball    a2, a6, .Lfixunssfdi_nan_or_inf
1601
1602         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1603         extui   a4, a2, 23, 8
1604         addi    a4, a4, -0x7f
1605         bgei    a4, 64, .Lfixunssfdi_maxint
1606         bltz    a4, .Lfixunssfdi_zero
1607
1608         /* Add explicit "1.0" and shift << 8.  */
1609         or      a7, a2, a6
1610         slli    xh, a7, 8
1611
1612         /* Shift back to the right, based on the exponent.  */
1613         addi    a4, a4, 1
1614         beqi    a4, 64, .Lfixunssfdi_bigexp
1615         ssl     a4              /* shift by 64 - a4 */
1616         bgei    a4, 32, .Lfixunssfdi_smallshift
1617         srl     xl, xh
1618         movi    xh, 0
1619
1620 .Lfixunssfdi_shifted:
1621         /* Negate the result if sign != 0.  */
1622         bgez    a7, 1f
1623         neg     xl, xl
1624         neg     xh, xh
1625         beqz    xl, 1f
1626         addi    xh, xh, -1
1627 1:      leaf_return
1628
1629 .Lfixunssfdi_smallshift:
1630         movi    xl, 0
1631         src     xl, xh, xl
1632         srl     xh, xh
1633         j       .Lfixunssfdi_shifted
1634
1635 .Lfixunssfdi_nan_or_inf:
1636         /* Handle Infinity and NaN.  */
1637         slli    a4, a2, 9
1638         beqz    a4, .Lfixunssfdi_maxint
1639
1640         /* Translate NaN to 0xffffffff.... */
1641 1:      movi    xh, -1
1642         movi    xl, -1
1643         leaf_return
1644
1645 .Lfixunssfdi_maxint:
1646         bgez    a2, 1b
1647 2:      slli    xh, a6, 8       /* 0x80000000 */
1648         movi    xl, 0
1649         leaf_return
1650
1651 .Lfixunssfdi_zero:
1652         movi    xh, 0
1653         movi    xl, 0
1654         leaf_return
1655
1656 .Lfixunssfdi_bigexp:
1657         /* Handle unsigned maximum exponent case.  */
1658         bltz    a7, 2b
1659         movi    xl, 0
1660         leaf_return             /* no shift needed */
1661
1662 #endif /* L_fixunssfdi */
1663
1664 #ifdef L_floatsisf
1665
1666         .align  4
1667         .global __floatunsisf
1668         .type   __floatunsisf, @function
1669 __floatunsisf:
1670         leaf_entry sp, 16
1671         beqz    a2, .Lfloatsisf_return
1672
1673         /* Set the sign to zero and jump to the floatsisf code.  */
1674         movi    a7, 0
1675         j       .Lfloatsisf_normalize
1676
1677         .align  4
1678         .global __floatsisf
1679         .type   __floatsisf, @function
1680 __floatsisf:
1681         leaf_entry sp, 16
1682
1683         /* Check for zero.  */
1684         beqz    a2, .Lfloatsisf_return
1685
1686         /* Save the sign.  */
1687         extui   a7, a2, 31, 1
1688
1689         /* Get the absolute value.  */
1690 #if XCHAL_HAVE_ABS
1691         abs     a2, a2
1692 #else
1693         neg     a4, a2
1694         movltz  a2, a4, a2
1695 #endif
1696
1697 .Lfloatsisf_normalize:
1698         /* Normalize with the first 1 bit in the msb.  */
1699         do_nsau a4, a2, a5, a6
1700         ssl     a4
1701         sll     a5, a2
1702
1703         /* Shift the mantissa into position, with rounding bits in a6.  */
1704         srli    a2, a5, 8
1705         slli    a6, a5, (32 - 8)
1706
1707         /* Set the exponent.  */
1708         movi    a5, 0x9d        /* 0x7e + 31 */
1709         sub     a5, a5, a4
1710         slli    a5, a5, 23
1711         add     a2, a2, a5
1712
1713         /* Add the sign.  */
1714         slli    a7, a7, 31
1715         or      a2, a2, a7
1716
1717         /* Round up if the leftover fraction is >= 1/2.  */
1718         bgez    a6, .Lfloatsisf_return
1719         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1720
1721         /* Check if the leftover fraction is exactly 1/2.  */
1722         slli    a6, a6, 1
1723         beqz    a6, .Lfloatsisf_exactlyhalf
1724
1725 .Lfloatsisf_return:
1726         leaf_return
1727
1728 .Lfloatsisf_exactlyhalf:
1729         /* Round down to the nearest even value.  */
1730         srli    a2, a2, 1
1731         slli    a2, a2, 1
1732         leaf_return
1733
1734 #endif /* L_floatsisf */
1735
1736 #ifdef L_floatdisf
1737
1738         .align  4
1739         .global __floatundisf
1740         .type   __floatundisf, @function
1741 __floatundisf:
1742         leaf_entry sp, 16
1743
1744         /* Check for zero.  */
1745         or      a4, xh, xl
1746         beqz    a4, 2f
1747
1748         /* Set the sign to zero and jump to the floatdisf code.  */
1749         movi    a7, 0
1750         j       .Lfloatdisf_normalize
1751
1752         .align  4
1753         .global __floatdisf
1754         .type   __floatdisf, @function
1755 __floatdisf:
1756         leaf_entry sp, 16
1757
1758         /* Check for zero.  */
1759         or      a4, xh, xl
1760         beqz    a4, 2f
1761
1762         /* Save the sign.  */
1763         extui   a7, xh, 31, 1
1764
1765         /* Get the absolute value.  */
1766         bgez    xh, .Lfloatdisf_normalize
1767         neg     xl, xl
1768         neg     xh, xh
1769         beqz    xl, .Lfloatdisf_normalize
1770         addi    xh, xh, -1
1771
1772 .Lfloatdisf_normalize:
1773         /* Normalize with the first 1 bit in the msb of xh.  */
1774         beqz    xh, .Lfloatdisf_bigshift
1775         do_nsau a4, xh, a5, a6
1776         ssl     a4
1777         src     xh, xh, xl
1778         sll     xl, xl
1779
1780 .Lfloatdisf_shifted:
1781         /* Shift the mantissa into position, with rounding bits in a6.  */
1782         ssai    8
1783         sll     a5, xl
1784         src     a6, xh, xl
1785         srl     xh, xh
1786         beqz    a5, 1f
1787         movi    a5, 1
1788         or      a6, a6, a5
1789 1:
1790         /* Set the exponent.  */
1791         movi    a5, 0xbd        /* 0x7e + 63 */
1792         sub     a5, a5, a4
1793         slli    a5, a5, 23
1794         add     a2, xh, a5
1795
1796         /* Add the sign.  */
1797         slli    a7, a7, 31
1798         or      a2, a2, a7
1799
1800         /* Round up if the leftover fraction is >= 1/2.  */
1801         bgez    a6, 2f
1802         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1803
1804         /* Check if the leftover fraction is exactly 1/2.  */
1805         slli    a6, a6, 1
1806         beqz    a6, .Lfloatdisf_exactlyhalf
1807 2:      leaf_return
1808
1809 .Lfloatdisf_bigshift:
1810         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1811         do_nsau a4, xl, a5, a6
1812         ssl     a4
1813         sll     xh, xl
1814         movi    xl, 0
1815         addi    a4, a4, 32
1816         j       .Lfloatdisf_shifted
1817
1818 .Lfloatdisf_exactlyhalf:
1819         /* Round down to the nearest even value.  */
1820         srli    a2, a2, 1
1821         slli    a2, a2, 1
1822         leaf_return
1823
1824 #endif /* L_floatdisf */
1825
1826 #if XCHAL_HAVE_FP_SQRT
1827 #ifdef L_sqrtf
1828         /* Square root */
1829
1830         .align  4
1831         .global __ieee754_sqrtf
1832         .type   __ieee754_sqrtf, @function
1833 __ieee754_sqrtf:
1834         leaf_entry      sp, 16
1835
1836         wfr             f1, a2
1837
1838         sqrt0.s         f2, f1
1839         const.s         f3, 0
1840         maddn.s         f3, f2, f2
1841         nexp01.s        f4, f1
1842         const.s         f0, 3
1843         addexp.s        f4, f0
1844         maddn.s         f0, f3, f4
1845         nexp01.s        f3, f1
1846         neg.s           f5, f3
1847         maddn.s         f2, f0, f2
1848         const.s         f0, 0
1849         const.s         f6, 0
1850         const.s         f7, 0
1851         maddn.s         f0, f5, f2
1852         maddn.s         f6, f2, f4
1853         const.s         f4, 3
1854         maddn.s         f7, f4, f2
1855         maddn.s         f3, f0, f0
1856         maddn.s         f4, f6, f2
1857         neg.s           f2, f7
1858         maddn.s         f0, f3, f2
1859         maddn.s         f7, f4, f7
1860         mksadj.s        f2, f1
1861         nexp01.s        f1, f1
1862         maddn.s         f1, f0, f0
1863         neg.s           f3, f7
1864         addexpm.s       f0, f2
1865         addexp.s        f3, f2
1866         divn.s          f0, f1, f3
1867
1868         rfr             a2, f0
1869
1870         leaf_return
1871
1872 #endif /* L_sqrtf */
1873 #endif /* XCHAL_HAVE_FP_SQRT */
1874
1875 #if XCHAL_HAVE_FP_RECIP
1876 #ifdef L_recipsf2
1877         /* Reciprocal */
1878
1879         .align  4
1880         .global __recipsf2
1881         .type   __recipsf2, @function
1882 __recipsf2:
1883         leaf_entry      sp, 16
1884
1885         wfr             f1, a2
1886
1887         recip0.s        f0, f1
1888         const.s         f2, 1
1889         msub.s          f2, f1, f0
1890         maddn.s         f0, f0, f2
1891         const.s         f2, 1
1892         msub.s          f2, f1, f0
1893         maddn.s         f0, f0, f2
1894
1895         rfr             a2, f0
1896
1897         leaf_return
1898
1899 #endif /* L_recipsf2 */
1900 #endif /* XCHAL_HAVE_FP_RECIP */
1901
1902 #if XCHAL_HAVE_FP_RSQRT
1903 #ifdef L_rsqrtsf2
1904         /* Reciprocal square root */
1905
1906         .align  4
1907         .global __rsqrtsf2
1908         .type   __rsqrtsf2, @function
1909 __rsqrtsf2:
1910         leaf_entry      sp, 16
1911
1912         wfr             f1, a2
1913
1914         rsqrt0.s        f0, f1
1915         mul.s           f2, f1, f0
1916         const.s         f3, 3;
1917         mul.s           f4, f3, f0
1918         const.s         f5, 1
1919         msub.s          f5, f2, f0
1920         maddn.s         f0, f4, f5
1921         mul.s           f2, f1, f0
1922         mul.s           f1, f3, f0
1923         const.s         f3, 1
1924         msub.s          f3, f2, f0
1925         maddn.s         f0, f1, f3
1926
1927         rfr             a2, f0
1928
1929         leaf_return
1930
1931 #endif /* L_rsqrtsf2 */
1932 #endif /* XCHAL_HAVE_FP_RSQRT */