libgcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006-2018 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negdf2
  44
  45         .align  4
  46         .global __negdf2
  47         .type   __negdf2, @function
  48 __negdf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     xh, xh, a4
  52         leaf_return
  53
  54 #endif /* L_negdf2 */
  55
  56 #ifdef L_addsubdf3
  57
  58         /* Addition */
  59 __adddf3_aux:
  60
  61         /* Handle NaNs and Infinities.  (This code is placed before the
  62            start of the function just to keep it in range of the limited
  63            branch displacements.)  */
  64
  65 .Ladd_xnan_or_inf:
  66         /* If y is neither Infinity nor NaN, return x.  */
  67         bnall   yh, a6, 1f
  68         /* If x is a NaN, return it.  Otherwise, return y.  */
  69         slli    a7, xh, 12
  70         or      a7, a7, xl
  71         beqz    a7, .Ladd_ynan_or_inf
  72 1:      leaf_return
  73
  74 .Ladd_ynan_or_inf:
  75         /* Return y.  */
  76         mov     xh, yh
  77         mov     xl, yl
  78         leaf_return
  79
  80 .Ladd_opposite_signs:
  81         /* Operand signs differ.  Do a subtraction.  */
  82         slli    a7, a6, 11
  83         xor     yh, yh, a7
  84         j       .Lsub_same_sign
  85
  86         .align  4
  87         .global __adddf3
  88         .type   __adddf3, @function
  89 __adddf3:
  90         leaf_entry sp, 16
  91         movi    a6, 0x7ff00000
  92
  93         /* Check if the two operands have the same sign.  */
  94         xor     a7, xh, yh
  95         bltz    a7, .Ladd_opposite_signs
  96
  97 .Ladd_same_sign:
  98         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
  99         ball    xh, a6, .Ladd_xnan_or_inf
 100         ball    yh, a6, .Ladd_ynan_or_inf
 101
 102         /* Compare the exponents.  The smaller operand will be shifted
 103            right by the exponent difference and added to the larger
 104            one.  */
 105         extui   a7, xh, 20, 12
 106         extui   a8, yh, 20, 12
 107         bltu    a7, a8, .Ladd_shiftx
 108
 109 .Ladd_shifty:
 110         /* Check if the smaller (or equal) exponent is zero.  */
 111         bnone   yh, a6, .Ladd_yexpzero
 112
 113         /* Replace yh sign/exponent with 0x001.  */
 114         or      yh, yh, a6
 115         slli    yh, yh, 11
 116         srli    yh, yh, 11
 117
 118 .Ladd_yexpdiff:
 119         /* Compute the exponent difference.  Optimize for difference < 32.  */
 120         sub     a10, a7, a8
 121         bgeui   a10, 32, .Ladd_bigshifty
 122
 123         /* Shift yh/yl right by the exponent difference.  Any bits that are
 124            shifted out of yl are saved in a9 for rounding the result.  */
 125         ssr     a10
 126         movi    a9, 0
 127         src     a9, yl, a9
 128         src     yl, yh, yl
 129         srl     yh, yh
 130
 131 .Ladd_addy:
 132         /* Do the 64-bit addition.  */
 133         add     xl, xl, yl
 134         add     xh, xh, yh
 135         bgeu    xl, yl, 1f
 136         addi    xh, xh, 1
 137 1:
 138         /* Check if the add overflowed into the exponent.  */
 139         extui   a10, xh, 20, 12
 140         beq     a10, a7, .Ladd_round
 141         mov     a8, a7
 142         j       .Ladd_carry
 143
 144 .Ladd_yexpzero:
 145         /* y is a subnormal value.  Replace its sign/exponent with zero,
 146            i.e., no implicit "1.0", and increment the apparent exponent
 147            because subnormals behave as if they had the minimum (nonzero)
 148            exponent.  Test for the case when both exponents are zero.  */
 149         slli    yh, yh, 12
 150         srli    yh, yh, 12
 151         bnone   xh, a6, .Ladd_bothexpzero
 152         addi    a8, a8, 1
 153         j       .Ladd_yexpdiff
 154
 155 .Ladd_bothexpzero:
 156         /* Both exponents are zero.  Handle this as a special case.  There
 157            is no need to shift or round, and the normal code for handling
 158            a carry into the exponent field will not work because it
 159            assumes there is an implicit "1.0" that needs to be added.  */
 160         add     xl, xl, yl
 161         add     xh, xh, yh
 162         bgeu    xl, yl, 1f
 163         addi    xh, xh, 1
 164 1:      leaf_return
 165
 166 .Ladd_bigshifty:
 167         /* Exponent difference > 64 -- just return the bigger value.  */
 168         bgeui   a10, 64, 1b
 169
 170         /* Shift yh/yl right by the exponent difference.  Any bits that are
 171            shifted out are saved in a9 for rounding the result.  */
 172         ssr     a10
 173         sll     a11, yl         /* lost bits shifted out of yl */
 174         src     a9, yh, yl
 175         srl     yl, yh
 176         movi    yh, 0
 177         beqz    a11, .Ladd_addy
 178         or      a9, a9, a10     /* any positive, nonzero value will work */
 179         j       .Ladd_addy
 180
 181 .Ladd_xexpzero:
 182         /* Same as "yexpzero" except skip handling the case when both
 183            exponents are zero.  */
 184         slli    xh, xh, 12
 185         srli    xh, xh, 12
 186         addi    a7, a7, 1
 187         j       .Ladd_xexpdiff
 188
 189 .Ladd_shiftx:
 190         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 191            because the exponent difference is always nonzero in this version,
 192            the shift sequence can use SLL and skip loading a constant zero.  */
 193         bnone   xh, a6, .Ladd_xexpzero
 194
 195         or      xh, xh, a6
 196         slli    xh, xh, 11
 197         srli    xh, xh, 11
 198
 199 .Ladd_xexpdiff:
 200         sub     a10, a8, a7
 201         bgeui   a10, 32, .Ladd_bigshiftx
 202
 203         ssr     a10
 204         sll     a9, xl
 205         src     xl, xh, xl
 206         srl     xh, xh
 207
 208 .Ladd_addx:
 209         add     xl, xl, yl
 210         add     xh, xh, yh
 211         bgeu    xl, yl, 1f
 212         addi    xh, xh, 1
 213 1:
 214         /* Check if the add overflowed into the exponent.  */
 215         extui   a10, xh, 20, 12
 216         bne     a10, a8, .Ladd_carry
 217
 218 .Ladd_round:
 219         /* Round up if the leftover fraction is >= 1/2.  */
 220         bgez    a9, 1f
 221         addi    xl, xl, 1
 222         beqz    xl, .Ladd_roundcarry
 223
 224         /* Check if the leftover fraction is exactly 1/2.  */
 225         slli    a9, a9, 1
 226         beqz    a9, .Ladd_exactlyhalf
 227 1:      leaf_return
 228
 229 .Ladd_bigshiftx:
 230         /* Mostly the same thing as "bigshifty"....  */
 231         bgeui   a10, 64, .Ladd_returny
 232
 233         ssr     a10
 234         sll     a11, xl
 235         src     a9, xh, xl
 236         srl     xl, xh
 237         movi    xh, 0
 238         beqz    a11, .Ladd_addx
 239         or      a9, a9, a10
 240         j       .Ladd_addx
 241
 242 .Ladd_returny:
 243         mov     xh, yh
 244         mov     xl, yl
 245         leaf_return
 246
 247 .Ladd_carry:
 248         /* The addition has overflowed into the exponent field, so the
 249            value needs to be renormalized.  The mantissa of the result
 250            can be recovered by subtracting the original exponent and
 251            adding 0x100000 (which is the explicit "1.0" for the
 252            mantissa of the non-shifted operand -- the "1.0" for the
 253            shifted operand was already added).  The mantissa can then
 254            be shifted right by one bit.  The explicit "1.0" of the
 255            shifted mantissa then needs to be replaced by the exponent,
 256            incremented by one to account for the normalizing shift.
 257            It is faster to combine these operations: do the shift first
 258            and combine the additions and subtractions.  If x is the
 259            original exponent, the result is:
 260                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 261            or:
 262                shifted mantissa + ((x + 1) << 19)
 263            Note that the exponent is incremented here by leaving the
 264            explicit "1.0" of the mantissa in the exponent field.  */
 265
 266         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 267         mov     a10, xl
 268         ssai    1
 269         src     xl, xh, xl
 270         srl     xh, xh
 271
 272         /* See explanation above.  The original exponent is in a8.  */
 273         addi    a8, a8, 1
 274         slli    a8, a8, 19
 275         add     xh, xh, a8
 276
 277         /* Return an Infinity if the exponent overflowed.  */
 278         ball    xh, a6, .Ladd_infinity
 279
 280         /* Same thing as the "round" code except the msb of the leftover
 281            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 282         bbci.l  a10, 0, 1f
 283         addi    xl, xl, 1
 284         beqz    xl, .Ladd_roundcarry
 285         beqz    a9, .Ladd_exactlyhalf
 286 1:      leaf_return
 287
 288 .Ladd_infinity:
 289         /* Clear the mantissa.  */
 290         movi    xl, 0
 291         srli    xh, xh, 20
 292         slli    xh, xh, 20
 293
 294         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 295         slli    a8, a8, 1
 296         or      xh, xh, a8
 297         leaf_return
 298
 299 .Ladd_exactlyhalf:
 300         /* Round down to the nearest even value.  */
 301         srli    xl, xl, 1
 302         slli    xl, xl, 1
 303         leaf_return
 304
 305 .Ladd_roundcarry:
 306         /* xl is always zero when the rounding increment overflows, so
 307            there's no need to round it to an even value.  */
 308         addi    xh, xh, 1
 309         /* Overflow to the exponent is OK.  */
 310         leaf_return
 311
 312
 313         /* Subtraction */
 314 __subdf3_aux:
 315
 316         /* Handle NaNs and Infinities.  (This code is placed before the
 317            start of the function just to keep it in range of the limited
 318            branch displacements.)  */
 319
 320 .Lsub_xnan_or_inf:
 321         /* If y is neither Infinity nor NaN, return x.  */
 322         bnall   yh, a6, 1f
 323         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 324         movi    a4, 0x80000     /* make it a quiet NaN */
 325         or      xh, xh, a4
 326 1:      leaf_return
 327
 328 .Lsub_ynan_or_inf:
 329         /* Negate y and return it.  */
 330         slli    a7, a6, 11
 331         xor     xh, yh, a7
 332         mov     xl, yl
 333         leaf_return
 334
 335 .Lsub_opposite_signs:
 336         /* Operand signs differ.  Do an addition.  */
 337         slli    a7, a6, 11
 338         xor     yh, yh, a7
 339         j       .Ladd_same_sign
 340
 341         .align  4
 342         .global __subdf3
 343         .type   __subdf3, @function
 344 __subdf3:
 345         leaf_entry sp, 16
 346         movi    a6, 0x7ff00000
 347
 348         /* Check if the two operands have the same sign.  */
 349         xor     a7, xh, yh
 350         bltz    a7, .Lsub_opposite_signs
 351
 352 .Lsub_same_sign:
 353         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 354         ball    xh, a6, .Lsub_xnan_or_inf
 355         ball    yh, a6, .Lsub_ynan_or_inf
 356
 357         /* Compare the operands.  In contrast to addition, the entire
 358            value matters here.  */
 359         extui   a7, xh, 20, 11
 360         extui   a8, yh, 20, 11
 361         bltu    xh, yh, .Lsub_xsmaller
 362         beq     xh, yh, .Lsub_compare_low
 363
 364 .Lsub_ysmaller:
 365         /* Check if the smaller (or equal) exponent is zero.  */
 366         bnone   yh, a6, .Lsub_yexpzero
 367
 368         /* Replace yh sign/exponent with 0x001.  */
 369         or      yh, yh, a6
 370         slli    yh, yh, 11
 371         srli    yh, yh, 11
 372
 373 .Lsub_yexpdiff:
 374         /* Compute the exponent difference.  Optimize for difference < 32.  */
 375         sub     a10, a7, a8
 376         bgeui   a10, 32, .Lsub_bigshifty
 377
 378         /* Shift yh/yl right by the exponent difference.  Any bits that are
 379            shifted out of yl are saved in a9 for rounding the result.  */
 380         ssr     a10
 381         movi    a9, 0
 382         src     a9, yl, a9
 383         src     yl, yh, yl
 384         srl     yh, yh
 385
 386 .Lsub_suby:
 387         /* Do the 64-bit subtraction.  */
 388         sub     xh, xh, yh
 389         bgeu    xl, yl, 1f
 390         addi    xh, xh, -1
 391 1:      sub     xl, xl, yl
 392
 393         /* Subtract the leftover bits in a9 from zero and propagate any
 394            borrow from xh/xl.  */
 395         neg     a9, a9
 396         beqz    a9, 1f
 397         addi    a5, xh, -1
 398         moveqz  xh, a5, xl
 399         addi    xl, xl, -1
 400 1:
 401         /* Check if the subtract underflowed into the exponent.  */
 402         extui   a10, xh, 20, 11
 403         beq     a10, a7, .Lsub_round
 404         j       .Lsub_borrow
 405
 406 .Lsub_compare_low:
 407         /* The high words are equal.  Compare the low words.  */
 408         bltu    xl, yl, .Lsub_xsmaller
 409         bltu    yl, xl, .Lsub_ysmaller
 410         /* The operands are equal.  Return 0.0.  */
 411         movi    xh, 0
 412         movi    xl, 0
 413 1:      leaf_return
 414
 415 .Lsub_yexpzero:
 416         /* y is a subnormal value.  Replace its sign/exponent with zero,
 417            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 418            y's apparent exponent because subnormals behave as if they had
 419            the minimum (nonzero) exponent.  */
 420         slli    yh, yh, 12
 421         srli    yh, yh, 12
 422         bnone   xh, a6, .Lsub_yexpdiff
 423         addi    a8, a8, 1
 424         j       .Lsub_yexpdiff
 425
 426 .Lsub_bigshifty:
 427         /* Exponent difference > 64 -- just return the bigger value.  */
 428         bgeui   a10, 64, 1b
 429
 430         /* Shift yh/yl right by the exponent difference.  Any bits that are
 431            shifted out are saved in a9 for rounding the result.  */
 432         ssr     a10
 433         sll     a11, yl         /* lost bits shifted out of yl */
 434         src     a9, yh, yl
 435         srl     yl, yh
 436         movi    yh, 0
 437         beqz    a11, .Lsub_suby
 438         or      a9, a9, a10     /* any positive, nonzero value will work */
 439         j       .Lsub_suby
 440
 441 .Lsub_xsmaller:
 442         /* Same thing as the "ysmaller" code, but with x and y swapped and
 443            with y negated.  */
 444         bnone   xh, a6, .Lsub_xexpzero
 445
 446         or      xh, xh, a6
 447         slli    xh, xh, 11
 448         srli    xh, xh, 11
 449
 450 .Lsub_xexpdiff:
 451         sub     a10, a8, a7
 452         bgeui   a10, 32, .Lsub_bigshiftx
 453
 454         ssr     a10
 455         movi    a9, 0
 456         src     a9, xl, a9
 457         src     xl, xh, xl
 458         srl     xh, xh
 459
 460         /* Negate y.  */
 461         slli    a11, a6, 11
 462         xor     yh, yh, a11
 463
 464 .Lsub_subx:
 465         sub     xl, yl, xl
 466         sub     xh, yh, xh
 467         bgeu    yl, xl, 1f
 468         addi    xh, xh, -1
 469 1:
 470         /* Subtract the leftover bits in a9 from zero and propagate any
 471            borrow from xh/xl.  */
 472         neg     a9, a9
 473         beqz    a9, 1f
 474         addi    a5, xh, -1
 475         moveqz  xh, a5, xl
 476         addi    xl, xl, -1
 477 1:
 478         /* Check if the subtract underflowed into the exponent.  */
 479         extui   a10, xh, 20, 11
 480         bne     a10, a8, .Lsub_borrow
 481
 482 .Lsub_round:
 483         /* Round up if the leftover fraction is >= 1/2.  */
 484         bgez    a9, 1f
 485         addi    xl, xl, 1
 486         beqz    xl, .Lsub_roundcarry
 487
 488         /* Check if the leftover fraction is exactly 1/2.  */
 489         slli    a9, a9, 1
 490         beqz    a9, .Lsub_exactlyhalf
 491 1:      leaf_return
 492
 493 .Lsub_xexpzero:
 494         /* Same as "yexpzero".  */
 495         slli    xh, xh, 12
 496         srli    xh, xh, 12
 497         bnone   yh, a6, .Lsub_xexpdiff
 498         addi    a7, a7, 1
 499         j       .Lsub_xexpdiff
 500
 501 .Lsub_bigshiftx:
 502         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 503            shifted value set so that the subsequent subtraction flips the
 504            sign of y.  */
 505         bgeui   a10, 64, .Lsub_returny
 506
 507         ssr     a10
 508         sll     a11, xl
 509         src     a9, xh, xl
 510         srl     xl, xh
 511         slli    xh, a6, 11      /* set sign bit of xh */
 512         beqz    a11, .Lsub_subx
 513         or      a9, a9, a10
 514         j       .Lsub_subx
 515
 516 .Lsub_returny:
 517         /* Negate and return y.  */
 518         slli    a7, a6, 11
 519         xor     xh, yh, a7
 520         mov     xl, yl
 521         leaf_return
 522
 523 .Lsub_borrow:
 524         /* The subtraction has underflowed into the exponent field, so the
 525            value needs to be renormalized.  Shift the mantissa left as
 526            needed to remove any leading zeros and adjust the exponent
 527            accordingly.  If the exponent is not large enough to remove
 528            all the leading zeros, the result will be a subnormal value.  */
 529
 530         slli    a8, xh, 12
 531         beqz    a8, .Lsub_xhzero
 532         do_nsau a6, a8, a7, a11
 533         srli    a8, a8, 12
 534         bge     a6, a10, .Lsub_subnormal
 535         addi    a6, a6, 1
 536
 537 .Lsub_shift_lt32:
 538         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 539         ssl     a6
 540         src     a8, a8, xl
 541         src     xl, xl, a9
 542         sll     a9, a9
 543
 544         /* Combine the shifted mantissa with the sign and exponent,
 545            decrementing the exponent by a6.  (The exponent has already
 546            been decremented by one due to the borrow from the subtraction,
 547            but adding the mantissa will increment the exponent by one.)  */
 548         srli    xh, xh, 20
 549         sub     xh, xh, a6
 550         slli    xh, xh, 20
 551         add     xh, xh, a8
 552         j       .Lsub_round
 553
 554 .Lsub_exactlyhalf:
 555         /* Round down to the nearest even value.  */
 556         srli    xl, xl, 1
 557         slli    xl, xl, 1
 558         leaf_return
 559
 560 .Lsub_roundcarry:
 561         /* xl is always zero when the rounding increment overflows, so
 562            there's no need to round it to an even value.  */
 563         addi    xh, xh, 1
 564         /* Overflow to the exponent is OK.  */
 565         leaf_return
 566
 567 .Lsub_xhzero:
 568         /* When normalizing the result, all the mantissa bits in the high
 569            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 570         do_nsau a6, xl, a7, a11
 571         addi    a6, a6, 21
 572         blt     a10, a6, .Lsub_subnormal
 573
 574 .Lsub_normalize_shift:
 575         bltui   a6, 32, .Lsub_shift_lt32
 576
 577         ssl     a6
 578         src     a8, xl, a9
 579         sll     xl, a9
 580         movi    a9, 0
 581
 582         srli    xh, xh, 20
 583         sub     xh, xh, a6
 584         slli    xh, xh, 20
 585         add     xh, xh, a8
 586         j       .Lsub_round
 587
 588 .Lsub_subnormal:
 589         /* The exponent is too small to shift away all the leading zeros.
 590            Set a6 to the current exponent (which has already been
 591            decremented by the borrow) so that the exponent of the result
 592            will be zero.  Do not add 1 to a6 in this case, because: (1)
 593            adding the mantissa will not increment the exponent, so there is
 594            no need to subtract anything extra from the exponent to
 595            compensate, and (2) the effective exponent of a subnormal is 1
 596            not 0 so the shift amount must be 1 smaller than normal. */
 597         mov     a6, a10
 598         j       .Lsub_normalize_shift
 599
 600 #endif /* L_addsubdf3 */
 601
 602 #ifdef L_muldf3
 603
 604         /* Multiplication */
 605 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 606 #define XCHAL_NO_MUL 1
 607 #endif
 608
 609         .literal_position
 610 __muldf3_aux:
 611
 612         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 613            (This code is placed before the start of the function just to
 614            keep it in range of the limited branch displacements.)  */
 615
 616 .Lmul_xexpzero:
 617         /* Clear the sign bit of x.  */
 618         slli    xh, xh, 1
 619         srli    xh, xh, 1
 620
 621         /* If x is zero, return zero.  */
 622         or      a10, xh, xl
 623         beqz    a10, .Lmul_return_zero
 624
 625         /* Normalize x.  Adjust the exponent in a8.  */
 626         beqz    xh, .Lmul_xh_zero
 627         do_nsau a10, xh, a11, a12
 628         addi    a10, a10, -11
 629         ssl     a10
 630         src     xh, xh, xl
 631         sll     xl, xl
 632         movi    a8, 1
 633         sub     a8, a8, a10
 634         j       .Lmul_xnormalized
 635 .Lmul_xh_zero:
 636         do_nsau a10, xl, a11, a12
 637         addi    a10, a10, -11
 638         movi    a8, -31
 639         sub     a8, a8, a10
 640         ssl     a10
 641         bltz    a10, .Lmul_xl_srl
 642         sll     xh, xl
 643         movi    xl, 0
 644         j       .Lmul_xnormalized
 645 .Lmul_xl_srl:
 646         srl     xh, xl
 647         sll     xl, xl
 648         j       .Lmul_xnormalized
 649
 650 .Lmul_yexpzero:
 651         /* Clear the sign bit of y.  */
 652         slli    yh, yh, 1
 653         srli    yh, yh, 1
 654
 655         /* If y is zero, return zero.  */
 656         or      a10, yh, yl
 657         beqz    a10, .Lmul_return_zero
 658
 659         /* Normalize y.  Adjust the exponent in a9.  */
 660         beqz    yh, .Lmul_yh_zero
 661         do_nsau a10, yh, a11, a12
 662         addi    a10, a10, -11
 663         ssl     a10
 664         src     yh, yh, yl
 665         sll     yl, yl
 666         movi    a9, 1
 667         sub     a9, a9, a10
 668         j       .Lmul_ynormalized
 669 .Lmul_yh_zero:
 670         do_nsau a10, yl, a11, a12
 671         addi    a10, a10, -11
 672         movi    a9, -31
 673         sub     a9, a9, a10
 674         ssl     a10
 675         bltz    a10, .Lmul_yl_srl
 676         sll     yh, yl
 677         movi    yl, 0
 678         j       .Lmul_ynormalized
 679 .Lmul_yl_srl:
 680         srl     yh, yl
 681         sll     yl, yl
 682         j       .Lmul_ynormalized
 683
 684 .Lmul_return_zero:
 685         /* Return zero with the appropriate sign bit.  */
 686         srli    xh, a7, 31
 687         slli    xh, xh, 31
 688         movi    xl, 0
 689         j       .Lmul_done
 690
 691 .Lmul_xnan_or_inf:
 692         /* If y is zero, return NaN.  */
 693         bnez    yl, 1f
 694         slli    a8, yh, 1
 695         bnez    a8, 1f
 696         movi    a4, 0x80000     /* make it a quiet NaN */
 697         or      xh, xh, a4
 698         j       .Lmul_done
 699 1:
 700         /* If y is NaN, return y.  */
 701         bnall   yh, a6, .Lmul_returnx
 702         slli    a8, yh, 12
 703         or      a8, a8, yl
 704         beqz    a8, .Lmul_returnx
 705
 706 .Lmul_returny:
 707         mov     xh, yh
 708         mov     xl, yl
 709
 710 .Lmul_returnx:
 711         /* Set the sign bit and return.  */
 712         extui   a7, a7, 31, 1
 713         slli    xh, xh, 1
 714         ssai    1
 715         src     xh, a7, xh
 716         j       .Lmul_done
 717
 718 .Lmul_ynan_or_inf:
 719         /* If x is zero, return NaN.  */
 720         bnez    xl, .Lmul_returny
 721         slli    a8, xh, 1
 722         bnez    a8, .Lmul_returny
 723         movi    a7, 0x80000     /* make it a quiet NaN */
 724         or      xh, yh, a7
 725         j       .Lmul_done
 726
 727         .align  4
 728         .global __muldf3
 729         .type   __muldf3, @function
 730 __muldf3:
 731 #if __XTENSA_CALL0_ABI__
 732         leaf_entry sp, 32
 733         addi    sp, sp, -32
 734         s32i    a12, sp, 16
 735         s32i    a13, sp, 20
 736         s32i    a14, sp, 24
 737         s32i    a15, sp, 28
 738 #elif XCHAL_NO_MUL
 739         /* This is not really a leaf function; allocate enough stack space
 740            to allow CALL12s to a helper function.  */
 741         leaf_entry sp, 64
 742 #else
 743         leaf_entry sp, 32
 744 #endif
 745         movi    a6, 0x7ff00000
 746
 747         /* Get the sign of the result.  */
 748         xor     a7, xh, yh
 749
 750         /* Check for NaN and infinity.  */
 751         ball    xh, a6, .Lmul_xnan_or_inf
 752         ball    yh, a6, .Lmul_ynan_or_inf
 753
 754         /* Extract the exponents.  */
 755         extui   a8, xh, 20, 11
 756         extui   a9, yh, 20, 11
 757
 758         beqz    a8, .Lmul_xexpzero
 759 .Lmul_xnormalized:
 760         beqz    a9, .Lmul_yexpzero
 761 .Lmul_ynormalized:
 762
 763         /* Add the exponents.  */
 764         add     a8, a8, a9
 765
 766         /* Replace sign/exponent fields with explicit "1.0".  */
 767         movi    a10, 0x1fffff
 768         or      xh, xh, a6
 769         and     xh, xh, a10
 770         or      yh, yh, a6
 771         and     yh, yh, a10
 772
 773         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 774            The least-significant word of the result is thrown away except
 775            that if it is nonzero, the lsb of a6 is set to 1.  */
 776 #if XCHAL_HAVE_MUL32_HIGH
 777
 778         /* Compute a6 with any carry-outs in a10.  */
 779         movi    a10, 0
 780         mull    a6, xl, yh
 781         mull    a11, xh, yl
 782         add     a6, a6, a11
 783         bgeu    a6, a11, 1f
 784         addi    a10, a10, 1
 785 1:
 786         muluh   a11, xl, yl
 787         add     a6, a6, a11
 788         bgeu    a6, a11, 1f
 789         addi    a10, a10, 1
 790 1:
 791         /* If the low word of the result is nonzero, set the lsb of a6.  */
 792         mull    a11, xl, yl
 793         beqz    a11, 1f
 794         movi    a9, 1
 795         or      a6, a6, a9
 796 1:
 797         /* Compute xl with any carry-outs in a9.  */
 798         movi    a9, 0
 799         mull    a11, xh, yh
 800         add     a10, a10, a11
 801         bgeu    a10, a11, 1f
 802         addi    a9, a9, 1
 803 1:
 804         muluh   a11, xh, yl
 805         add     a10, a10, a11
 806         bgeu    a10, a11, 1f
 807         addi    a9, a9, 1
 808 1:
 809         muluh   xl, xl, yh
 810         add     xl, xl, a10
 811         bgeu    xl, a10, 1f
 812         addi    a9, a9, 1
 813 1:
 814         /* Compute xh.  */
 815         muluh   xh, xh, yh
 816         add     xh, xh, a9
 817
 818 #else /* ! XCHAL_HAVE_MUL32_HIGH */
 819
 820         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 821            products.  These partial products are:
 822
 823                 0 xll * yll
 824
 825                 1 xll * ylh
 826                 2 xlh * yll
 827
 828                 3 xll * yhl
 829                 4 xlh * ylh
 830                 5 xhl * yll
 831
 832                 6 xll * yhh
 833                 7 xlh * yhl
 834                 8 xhl * ylh
 835                 9 xhh * yll
 836
 837                 10 xlh * yhh
 838                 11 xhl * yhl
 839                 12 xhh * ylh
 840
 841                 13 xhl * yhh
 842                 14 xhh * yhl
 843
 844                 15 xhh * yhh
 845
 846            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 847            or Mul32 multiplier options, these input chunks must be stored in
 848            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 849            that the inputs come from either half of the registers, so there
 850            is no need to shift them out ahead of time.  If there is no
 851            multiply hardware, the 16-bit chunks can be extracted when setting
 852            up the arguments to the separate multiply function.  */
 853
 854         /* Save a7 since it is needed to hold a temporary value.  */
 855         s32i    a7, sp, 4
 856 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 857         /* Calling a separate multiply function will clobber a0 and requires
 858            use of a8 as a temporary, so save those values now.  (The function
 859            uses a custom ABI so nothing else needs to be saved.)  */
 860         s32i    a0, sp, 0
 861         s32i    a8, sp, 8
 862 #endif
 863
 864 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 865
 866 #define xlh a12
 867 #define ylh a13
 868 #define xhh a14
 869 #define yhh a15
 870
 871         /* Get the high halves of the inputs into registers.  */
 872         srli    xlh, xl, 16
 873         srli    ylh, yl, 16
 874         srli    xhh, xh, 16
 875         srli    yhh, yh, 16
 876
 877 #define xll xl
 878 #define yll yl
 879 #define xhl xh
 880 #define yhl yh
 881
 882 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 883         /* Clear the high halves of the inputs.  This does not matter
 884            for MUL16 because the high bits are ignored.  */
 885         extui   xl, xl, 0, 16
 886         extui   xh, xh, 0, 16
 887         extui   yl, yl, 0, 16
 888         extui   yh, yh, 0, 16
 889 #endif
 890 #endif /* MUL16 || MUL32 */
 891
 892
 893 #if XCHAL_HAVE_MUL16
 894
 895 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 896         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 897
 898 #elif XCHAL_HAVE_MUL32
 899
 900 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 901         mull    dst, xreg ## xhalf, yreg ## yhalf
 902
 903 #elif XCHAL_HAVE_MAC16
 904
 905 /* The preprocessor insists on inserting a space when concatenating after
 906    a period in the definition of do_mul below.  These macros are a workaround
 907    using underscores instead of periods when doing the concatenation.  */
 908 #define umul_aa_ll umul.aa.ll
 909 #define umul_aa_lh umul.aa.lh
 910 #define umul_aa_hl umul.aa.hl
 911 #define umul_aa_hh umul.aa.hh
 912
 913 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 914         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 915         rsr     dst, ACCLO
 916
 917 #else /* no multiply hardware */
 918
 919 #define set_arg_l(dst, src) \
 920         extui   dst, src, 0, 16
 921 #define set_arg_h(dst, src) \
 922         srli    dst, src, 16
 923
 924 #if __XTENSA_CALL0_ABI__
 925 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 926         set_arg_ ## xhalf (a13, xreg); \
 927         set_arg_ ## yhalf (a14, yreg); \
 928         call0   .Lmul_mulsi3; \
 929         mov     dst, a12
 930 #else
 931 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 932         set_arg_ ## xhalf (a14, xreg); \
 933         set_arg_ ## yhalf (a15, yreg); \
 934         call12  .Lmul_mulsi3; \
 935         mov     dst, a14
 936 #endif /* __XTENSA_CALL0_ABI__ */
 937
 938 #endif /* no multiply hardware */
 939
 940         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 941         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 942         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 943         movi    a9, 0
 944         add     a10, a10, a11
 945         bgeu    a10, a11, 1f
 946         addi    a9, a9, 1
 947 1:
 948         /* Initialize a6 with a9/a10 shifted into position.  Note that
 949            this value can be safely incremented without any carry-outs.  */
 950         ssai    16
 951         src     a6, a9, a10
 952
 953         /* Compute the low word into a10.  */
 954         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 955         sll     a10, a10
 956         add     a10, a10, a11
 957         bgeu    a10, a11, 1f
 958         addi    a6, a6, 1
 959 1:
 960         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 961            This is good enough to determine the low half of a6, so that any
 962            nonzero bits from the low word of the result can be collapsed
 963            into a6, freeing up a register.  */
 964         movi    a9, 0
 965         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 966         add     a6, a6, a11
 967         bgeu    a6, a11, 1f
 968         addi    a9, a9, 1
 969 1:
 970         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 971         add     a6, a6, a11
 972         bgeu    a6, a11, 1f
 973         addi    a9, a9, 1
 974 1:
 975         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 976         add     a6, a6, a11
 977         bgeu    a6, a11, 1f
 978         addi    a9, a9, 1
 979 1:
 980         /* Collapse any nonzero bits from the low word into a6.  */
 981         beqz    a10, 1f
 982         movi    a11, 1
 983         or      a6, a6, a11
 984 1:
 985         /* Add pp6-9 into a11 with carry-outs in a10.  */
 986         do_mul(a7, xl, l, yh, h)        /* pp 6 */
 987         do_mul(a11, xh, h, yl, l)       /* pp 9 */
 988         movi    a10, 0
 989         add     a11, a11, a7
 990         bgeu    a11, a7, 1f
 991         addi    a10, a10, 1
 992 1:
 993         do_mul(a7, xl, h, yh, l)        /* pp 7 */
 994         add     a11, a11, a7
 995         bgeu    a11, a7, 1f
 996         addi    a10, a10, 1
 997 1:
 998         do_mul(a7, xh, l, yl, h)        /* pp 8 */
 999         add     a11, a11, a7
1000         bgeu    a11, a7, 1f
1001         addi    a10, a10, 1
1002 1:
1003         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1004         src     a10, a10, a11
1005         add     a10, a10, a9
1006         sll     a11, a11
1007         add     a6, a6, a11
1008         bgeu    a6, a11, 1f
1009         addi    a10, a10, 1
1010 1:
1011         /* Add pp10-12 into xl with carry-outs in a9.  */
1012         movi    a9, 0
1013         do_mul(xl, xl, h, yh, h)        /* pp 10 */
1014         add     xl, xl, a10
1015         bgeu    xl, a10, 1f
1016         addi    a9, a9, 1
1017 1:
1018         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1019         add     xl, xl, a10
1020         bgeu    xl, a10, 1f
1021         addi    a9, a9, 1
1022 1:
1023         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1024         add     xl, xl, a10
1025         bgeu    xl, a10, 1f
1026         addi    a9, a9, 1
1027 1:
1028         /* Add pp13-14 into a11 with carry-outs in a10.  */
1029         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1030         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1031         movi    a10, 0
1032         add     a11, a11, a7
1033         bgeu    a11, a7, 1f
1034         addi    a10, a10, 1
1035 1:
1036         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1037         src     a10, a10, a11
1038         add     a10, a10, a9
1039         sll     a11, a11
1040         add     xl, xl, a11
1041         bgeu    xl, a11, 1f
1042         addi    a10, a10, 1
1043 1:
1044         /* Compute xh.  */
1045         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1046         add     xh, xh, a10
1047
1048         /* Restore values saved on the stack during the multiplication.  */
1049         l32i    a7, sp, 4
1050 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1051         l32i    a0, sp, 0
1052         l32i    a8, sp, 8
1053 #endif
1054 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1055
1056         /* Shift left by 12 bits, unless there was a carry-out from the
1057            multiply, in which case, shift by 11 bits and increment the
1058            exponent.  Note: It is convenient to use the constant 0x3ff
1059            instead of 0x400 when removing the extra exponent bias (so that
1060            it is easy to construct 0x7fe for the overflow check).  Reverse
1061            the logic here to decrement the exponent sum by one unless there
1062            was a carry-out.  */
1063         movi    a4, 11
1064         srli    a5, xh, 21 - 12
1065         bnez    a5, 1f
1066         addi    a4, a4, 1
1067         addi    a8, a8, -1
1068 1:      ssl     a4
1069         src     xh, xh, xl
1070         src     xl, xl, a6
1071         sll     a6, a6
1072
1073         /* Subtract the extra bias from the exponent sum (plus one to account
1074            for the explicit "1.0" of the mantissa that will be added to the
1075            exponent in the final result).  */
1076         movi    a4, 0x3ff
1077         sub     a8, a8, a4
1078
1079         /* Check for over/underflow.  The value in a8 is one less than the
1080            final exponent, so values in the range 0..7fd are OK here.  */
1081         slli    a4, a4, 1       /* 0x7fe */
1082         bgeu    a8, a4, .Lmul_overflow
1083
1084 .Lmul_round:
1085         /* Round.  */
1086         bgez    a6, .Lmul_rounded
1087         addi    xl, xl, 1
1088         beqz    xl, .Lmul_roundcarry
1089         slli    a6, a6, 1
1090         beqz    a6, .Lmul_exactlyhalf
1091
1092 .Lmul_rounded:
1093         /* Add the exponent to the mantissa.  */
1094         slli    a8, a8, 20
1095         add     xh, xh, a8
1096
1097 .Lmul_addsign:
1098         /* Add the sign bit.  */
1099         srli    a7, a7, 31
1100         slli    a7, a7, 31
1101         or      xh, xh, a7
1102
1103 .Lmul_done:
1104 #if __XTENSA_CALL0_ABI__
1105         l32i    a12, sp, 16
1106         l32i    a13, sp, 20
1107         l32i    a14, sp, 24
1108         l32i    a15, sp, 28
1109         addi    sp, sp, 32
1110 #endif
1111         leaf_return
1112
1113 .Lmul_exactlyhalf:
1114         /* Round down to the nearest even value.  */
1115         srli    xl, xl, 1
1116         slli    xl, xl, 1
1117         j       .Lmul_rounded
1118
1119 .Lmul_roundcarry:
1120         /* xl is always zero when the rounding increment overflows, so
1121            there's no need to round it to an even value.  */
1122         addi    xh, xh, 1
1123         /* Overflow is OK -- it will be added to the exponent.  */
1124         j       .Lmul_rounded
1125
1126 .Lmul_overflow:
1127         bltz    a8, .Lmul_underflow
1128         /* Return +/- Infinity.  */
1129         addi    a8, a4, 1       /* 0x7ff */
1130         slli    xh, a8, 20
1131         movi    xl, 0
1132         j       .Lmul_addsign
1133
1134 .Lmul_underflow:
1135         /* Create a subnormal value, where the exponent field contains zero,
1136            but the effective exponent is 1.  The value of a8 is one less than
1137            the actual exponent, so just negate it to get the shift amount.  */
1138         neg     a8, a8
1139         mov     a9, a6
1140         ssr     a8
1141         bgeui   a8, 32, .Lmul_bigshift
1142
1143         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1144            in a6 (combined with the shifted-out bits currently in a6) for
1145            rounding the result.  */
1146         sll     a6, xl
1147         src     xl, xh, xl
1148         srl     xh, xh
1149         j       1f
1150
1151 .Lmul_bigshift:
1152         bgeui   a8, 64, .Lmul_flush_to_zero
1153         sll     a10, xl         /* lost bits shifted out of xl */
1154         src     a6, xh, xl
1155         srl     xl, xh
1156         movi    xh, 0
1157         or      a9, a9, a10
1158
1159         /* Set the exponent to zero.  */
1160 1:      movi    a8, 0
1161
1162         /* Pack any nonzero bits shifted out into a6.  */
1163         beqz    a9, .Lmul_round
1164         movi    a9, 1
1165         or      a6, a6, a9
1166         j       .Lmul_round
1167
1168 .Lmul_flush_to_zero:
1169         /* Return zero with the appropriate sign bit.  */
1170         srli    xh, a7, 31
1171         slli    xh, xh, 31
1172         movi    xl, 0
1173         j       .Lmul_done
1174
1175 #if XCHAL_NO_MUL
1176
1177         /* For Xtensa processors with no multiply hardware, this simplified
1178            version of _mulsi3 is used for multiplying 16-bit chunks of
1179            the floating-point mantissas.  When using CALL0, this function
1180            uses a custom ABI: the inputs are passed in a13 and a14, the
1181            result is returned in a12, and a8 and a15 are clobbered.  */
1182         .align  4
1183 .Lmul_mulsi3:
1184         leaf_entry sp, 16
1185         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1186         movi    \dst, 0
1187 1:      add     \tmp1, \src2, \dst
1188         extui   \tmp2, \src1, 0, 1
1189         movnez  \dst, \tmp1, \tmp2
1190
1191         do_addx2 \tmp1, \src2, \dst, \tmp1
1192         extui   \tmp2, \src1, 1, 1
1193         movnez  \dst, \tmp1, \tmp2
1194
1195         do_addx4 \tmp1, \src2, \dst, \tmp1
1196         extui   \tmp2, \src1, 2, 1
1197         movnez  \dst, \tmp1, \tmp2
1198
1199         do_addx8 \tmp1, \src2, \dst, \tmp1
1200         extui   \tmp2, \src1, 3, 1
1201         movnez  \dst, \tmp1, \tmp2
1202
1203         srli    \src1, \src1, 4
1204         slli    \src2, \src2, 4
1205         bnez    \src1, 1b
1206         .endm
1207 #if __XTENSA_CALL0_ABI__
1208         mul_mulsi3_body a12, a13, a14, a15, a8
1209 #else
1210         /* The result will be written into a2, so save that argument in a4.  */
1211         mov     a4, a2
1212         mul_mulsi3_body a2, a4, a3, a5, a6
1213 #endif
1214         leaf_return
1215 #endif /* XCHAL_NO_MUL */
1216 #endif /* L_muldf3 */
1217
1218 #ifdef L_divdf3
1219
1220         /* Division */
1221
1222 #if XCHAL_HAVE_DFP_DIV
1223
1224         .text
1225         .align 4
1226         .global __divdf3
1227         .type   __divdf3, @function
1228 __divdf3:
1229         leaf_entry      sp, 16
1230
1231         wfrd            f1, xh, xl
1232         wfrd            f2, yh, yl
1233
1234         div0.d          f3, f2
1235         nexp01.d        f4, f2
1236         const.d         f0, 1
1237         maddn.d         f0, f4, f3
1238         const.d         f5, 0
1239         mov.d           f7, f2
1240         mkdadj.d        f7, f1
1241         maddn.d         f3, f0, f3
1242         maddn.d         f5, f0, f0
1243         nexp01.d        f1, f1
1244         div0.d          f2, f2
1245         maddn.d         f3, f5, f3
1246         const.d         f5, 1
1247         const.d         f0, 0
1248         neg.d           f6, f1
1249         maddn.d         f5, f4, f3
1250         maddn.d         f0, f6, f2
1251         maddn.d         f3, f5, f3
1252         maddn.d         f6, f4, f0
1253         const.d         f2, 1
1254         maddn.d         f2, f4, f3
1255         maddn.d         f0, f6, f3
1256         neg.d           f1, f1
1257         maddn.d         f3, f2, f3
1258         maddn.d         f1, f4, f0
1259         addexpm.d       f0, f7
1260         addexp.d        f3, f7
1261         divn.d          f0, f1, f3
1262
1263         rfr             xl, f0
1264         rfrd            xh, f0
1265
1266         leaf_return
1267
1268 #else
1269
1270         .literal_position
1271
1272 __divdf3_aux:
1273
1274         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1275            (This code is placed before the start of the function just to
1276            keep it in range of the limited branch displacements.)  */
1277
1278 .Ldiv_yexpzero:
1279         /* Clear the sign bit of y.  */
1280         slli    yh, yh, 1
1281         srli    yh, yh, 1
1282
1283         /* Check for division by zero.  */
1284         or      a10, yh, yl
1285         beqz    a10, .Ldiv_yzero
1286
1287         /* Normalize y.  Adjust the exponent in a9.  */
1288         beqz    yh, .Ldiv_yh_zero
1289         do_nsau a10, yh, a11, a9
1290         addi    a10, a10, -11
1291         ssl     a10
1292         src     yh, yh, yl
1293         sll     yl, yl
1294         movi    a9, 1
1295         sub     a9, a9, a10
1296         j       .Ldiv_ynormalized
1297 .Ldiv_yh_zero:
1298         do_nsau a10, yl, a11, a9
1299         addi    a10, a10, -11
1300         movi    a9, -31
1301         sub     a9, a9, a10
1302         ssl     a10
1303         bltz    a10, .Ldiv_yl_srl
1304         sll     yh, yl
1305         movi    yl, 0
1306         j       .Ldiv_ynormalized
1307 .Ldiv_yl_srl:
1308         srl     yh, yl
1309         sll     yl, yl
1310         j       .Ldiv_ynormalized
1311
1312 .Ldiv_yzero:
1313         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1314         slli    xh, xh, 1
1315         srli    xh, xh, 1
1316         or      xl, xl, xh
1317         srli    xh, a7, 31
1318         slli    xh, xh, 31
1319         or      xh, xh, a6
1320         bnez    xl, 1f
1321         movi    a4, 0x80000     /* make it a quiet NaN */
1322         or      xh, xh, a4
1323 1:      movi    xl, 0
1324         leaf_return
1325
1326 .Ldiv_xexpzero:
1327         /* Clear the sign bit of x.  */
1328         slli    xh, xh, 1
1329         srli    xh, xh, 1
1330
1331         /* If x is zero, return zero.  */
1332         or      a10, xh, xl
1333         beqz    a10, .Ldiv_return_zero
1334
1335         /* Normalize x.  Adjust the exponent in a8.  */
1336         beqz    xh, .Ldiv_xh_zero
1337         do_nsau a10, xh, a11, a8
1338         addi    a10, a10, -11
1339         ssl     a10
1340         src     xh, xh, xl
1341         sll     xl, xl
1342         movi    a8, 1
1343         sub     a8, a8, a10
1344         j       .Ldiv_xnormalized
1345 .Ldiv_xh_zero:
1346         do_nsau a10, xl, a11, a8
1347         addi    a10, a10, -11
1348         movi    a8, -31
1349         sub     a8, a8, a10
1350         ssl     a10
1351         bltz    a10, .Ldiv_xl_srl
1352         sll     xh, xl
1353         movi    xl, 0
1354         j       .Ldiv_xnormalized
1355 .Ldiv_xl_srl:
1356         srl     xh, xl
1357         sll     xl, xl
1358         j       .Ldiv_xnormalized
1359
1360 .Ldiv_return_zero:
1361         /* Return zero with the appropriate sign bit.  */
1362         srli    xh, a7, 31
1363         slli    xh, xh, 31
1364         movi    xl, 0
1365         leaf_return
1366
1367 .Ldiv_xnan_or_inf:
1368         /* Set the sign bit of the result.  */
1369         srli    a7, yh, 31
1370         slli    a7, a7, 31
1371         xor     xh, xh, a7
1372         /* If y is NaN or Inf, return NaN.  */
1373         bnall   yh, a6, 1f
1374         movi    a4, 0x80000     /* make it a quiet NaN */
1375         or      xh, xh, a4
1376 1:      leaf_return
1377
1378 .Ldiv_ynan_or_inf:
1379         /* If y is Infinity, return zero.  */
1380         slli    a8, yh, 12
1381         or      a8, a8, yl
1382         beqz    a8, .Ldiv_return_zero
1383         /* y is NaN; return it.  */
1384         mov     xh, yh
1385         mov     xl, yl
1386         leaf_return
1387
1388 .Ldiv_highequal1:
1389         bltu    xl, yl, 2f
1390         j       3f
1391
1392         .align  4
1393         .global __divdf3
1394         .type   __divdf3, @function
1395 __divdf3:
1396         leaf_entry sp, 16
1397         movi    a6, 0x7ff00000
1398
1399         /* Get the sign of the result.  */
1400         xor     a7, xh, yh
1401
1402         /* Check for NaN and infinity.  */
1403         ball    xh, a6, .Ldiv_xnan_or_inf
1404         ball    yh, a6, .Ldiv_ynan_or_inf
1405
1406         /* Extract the exponents.  */
1407         extui   a8, xh, 20, 11
1408         extui   a9, yh, 20, 11
1409
1410         beqz    a9, .Ldiv_yexpzero
1411 .Ldiv_ynormalized:
1412         beqz    a8, .Ldiv_xexpzero
1413 .Ldiv_xnormalized:
1414
1415         /* Subtract the exponents.  */
1416         sub     a8, a8, a9
1417
1418         /* Replace sign/exponent fields with explicit "1.0".  */
1419         movi    a10, 0x1fffff
1420         or      xh, xh, a6
1421         and     xh, xh, a10
1422         or      yh, yh, a6
1423         and     yh, yh, a10
1424
1425         /* Set SAR for left shift by one.  */
1426         ssai    (32 - 1)
1427
1428         /* The first digit of the mantissa division must be a one.
1429            Shift x (and adjust the exponent) as needed to make this true.  */
1430         bltu    yh, xh, 3f
1431         beq     yh, xh, .Ldiv_highequal1
1432 2:      src     xh, xh, xl
1433         sll     xl, xl
1434         addi    a8, a8, -1
1435 3:
1436         /* Do the first subtraction and shift.  */
1437         sub     xh, xh, yh
1438         bgeu    xl, yl, 1f
1439         addi    xh, xh, -1
1440 1:      sub     xl, xl, yl
1441         src     xh, xh, xl
1442         sll     xl, xl
1443
1444         /* Put the quotient into a10/a11.  */
1445         movi    a10, 0
1446         movi    a11, 1
1447
1448         /* Divide one bit at a time for 52 bits.  */
1449         movi    a9, 52
1450 #if XCHAL_HAVE_LOOPS
1451         loop    a9, .Ldiv_loopend
1452 #endif
1453 .Ldiv_loop:
1454         /* Shift the quotient << 1.  */
1455         src     a10, a10, a11
1456         sll     a11, a11
1457
1458         /* Is this digit a 0 or 1?  */
1459         bltu    xh, yh, 3f
1460         beq     xh, yh, .Ldiv_highequal2
1461
1462         /* Output a 1 and subtract.  */
1463 2:      addi    a11, a11, 1
1464         sub     xh, xh, yh
1465         bgeu    xl, yl, 1f
1466         addi    xh, xh, -1
1467 1:      sub     xl, xl, yl
1468
1469         /* Shift the dividend << 1.  */
1470 3:      src     xh, xh, xl
1471         sll     xl, xl
1472
1473 #if !XCHAL_HAVE_LOOPS
1474         addi    a9, a9, -1
1475         bnez    a9, .Ldiv_loop
1476 #endif
1477 .Ldiv_loopend:
1478
1479         /* Add the exponent bias (less one to account for the explicit "1.0"
1480            of the mantissa that will be added to the exponent in the final
1481            result).  */
1482         movi    a9, 0x3fe
1483         add     a8, a8, a9
1484
1485         /* Check for over/underflow.  The value in a8 is one less than the
1486            final exponent, so values in the range 0..7fd are OK here.  */
1487         addmi   a9, a9, 0x400   /* 0x7fe */
1488         bgeu    a8, a9, .Ldiv_overflow
1489
1490 .Ldiv_round:
1491         /* Round.  The remainder (<< 1) is in xh/xl.  */
1492         bltu    xh, yh, .Ldiv_rounded
1493         beq     xh, yh, .Ldiv_highequal3
1494 .Ldiv_roundup:
1495         addi    a11, a11, 1
1496         beqz    a11, .Ldiv_roundcarry
1497
1498 .Ldiv_rounded:
1499         mov     xl, a11
1500         /* Add the exponent to the mantissa.  */
1501         slli    a8, a8, 20
1502         add     xh, a10, a8
1503
1504 .Ldiv_addsign:
1505         /* Add the sign bit.  */
1506         srli    a7, a7, 31
1507         slli    a7, a7, 31
1508         or      xh, xh, a7
1509         leaf_return
1510
1511 .Ldiv_highequal2:
1512         bgeu    xl, yl, 2b
1513         j       3b
1514
1515 .Ldiv_highequal3:
1516         bltu    xl, yl, .Ldiv_rounded
1517         bne     xl, yl, .Ldiv_roundup
1518
1519         /* Remainder is exactly half the divisor.  Round even.  */
1520         addi    a11, a11, 1
1521         beqz    a11, .Ldiv_roundcarry
1522         srli    a11, a11, 1
1523         slli    a11, a11, 1
1524         j       .Ldiv_rounded
1525
1526 .Ldiv_overflow:
1527         bltz    a8, .Ldiv_underflow
1528         /* Return +/- Infinity.  */
1529         addi    a8, a9, 1       /* 0x7ff */
1530         slli    xh, a8, 20
1531         movi    xl, 0
1532         j       .Ldiv_addsign
1533
1534 .Ldiv_underflow:
1535         /* Create a subnormal value, where the exponent field contains zero,
1536            but the effective exponent is 1.  The value of a8 is one less than
1537            the actual exponent, so just negate it to get the shift amount.  */
1538         neg     a8, a8
1539         ssr     a8
1540         bgeui   a8, 32, .Ldiv_bigshift
1541
1542         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1543            saved in a6 for rounding the result.  */
1544         sll     a6, a11
1545         src     a11, a10, a11
1546         srl     a10, a10
1547         j       1f
1548
1549 .Ldiv_bigshift:
1550         bgeui   a8, 64, .Ldiv_flush_to_zero
1551         sll     a9, a11         /* lost bits shifted out of a11 */
1552         src     a6, a10, a11
1553         srl     a11, a10
1554         movi    a10, 0
1555         or      xl, xl, a9
1556
1557         /* Set the exponent to zero.  */
1558 1:      movi    a8, 0
1559
1560         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1561         or      xh, xh, xl
1562         beqz    xh, 1f
1563         movi    a9, 1
1564         or      a6, a6, a9
1565
1566         /* Round a10/a11 based on the bits shifted out into a6.  */
1567 1:      bgez    a6, .Ldiv_rounded
1568         addi    a11, a11, 1
1569         beqz    a11, .Ldiv_roundcarry
1570         slli    a6, a6, 1
1571         bnez    a6, .Ldiv_rounded
1572         srli    a11, a11, 1
1573         slli    a11, a11, 1
1574         j       .Ldiv_rounded
1575
1576 .Ldiv_roundcarry:
1577         /* a11 is always zero when the rounding increment overflows, so
1578            there's no need to round it to an even value.  */
1579         addi    a10, a10, 1
1580         /* Overflow to the exponent field is OK.  */
1581         j       .Ldiv_rounded
1582
1583 .Ldiv_flush_to_zero:
1584         /* Return zero with the appropriate sign bit.  */
1585         srli    xh, a7, 31
1586         slli    xh, xh, 31
1587         movi    xl, 0
1588         leaf_return
1589
1590 #endif /* XCHAL_HAVE_DFP_DIV */
1591
1592 #endif /* L_divdf3 */
1593
1594 #ifdef L_cmpdf2
1595
1596         /* Equal and Not Equal */
1597
1598         .align  4
1599         .global __eqdf2
1600         .global __nedf2
1601         .set    __nedf2, __eqdf2
1602         .type   __eqdf2, @function
1603 __eqdf2:
1604         leaf_entry sp, 16
1605         bne     xl, yl, 2f
1606         bne     xh, yh, 4f
1607
1608         /* The values are equal but NaN != NaN.  Check the exponent.  */
1609         movi    a6, 0x7ff00000
1610         ball    xh, a6, 3f
1611
1612         /* Equal.  */
1613         movi    a2, 0
1614         leaf_return
1615
1616         /* Not equal.  */
1617 2:      movi    a2, 1
1618         leaf_return
1619
1620         /* Check if the mantissas are nonzero.  */
1621 3:      slli    a7, xh, 12
1622         or      a7, a7, xl
1623         j       5f
1624
1625         /* Check if x and y are zero with different signs.  */
1626 4:      or      a7, xh, yh
1627         slli    a7, a7, 1
1628         or      a7, a7, xl      /* xl == yl here */
1629
1630         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1631            or x when exponent(x) = 0x7ff and x == y.  */
1632 5:      movi    a2, 0
1633         movi    a3, 1
1634         movnez  a2, a3, a7
1635         leaf_return
1636
1637
1638         /* Greater Than */
1639
1640         .align  4
1641         .global __gtdf2
1642         .type   __gtdf2, @function
1643 __gtdf2:
1644         leaf_entry sp, 16
1645         movi    a6, 0x7ff00000
1646         ball    xh, a6, 2f
1647 1:      bnall   yh, a6, .Lle_cmp
1648
1649         /* Check if y is a NaN.  */
1650         slli    a7, yh, 12
1651         or      a7, a7, yl
1652         beqz    a7, .Lle_cmp
1653         movi    a2, 0
1654         leaf_return
1655
1656         /* Check if x is a NaN.  */
1657 2:      slli    a7, xh, 12
1658         or      a7, a7, xl
1659         beqz    a7, 1b
1660         movi    a2, 0
1661         leaf_return
1662
1663
1664         /* Less Than or Equal */
1665
1666         .align  4
1667         .global __ledf2
1668         .type   __ledf2, @function
1669 __ledf2:
1670         leaf_entry sp, 16
1671         movi    a6, 0x7ff00000
1672         ball    xh, a6, 2f
1673 1:      bnall   yh, a6, .Lle_cmp
1674
1675         /* Check if y is a NaN.  */
1676         slli    a7, yh, 12
1677         or      a7, a7, yl
1678         beqz    a7, .Lle_cmp
1679         movi    a2, 1
1680         leaf_return
1681
1682         /* Check if x is a NaN.  */
1683 2:      slli    a7, xh, 12
1684         or      a7, a7, xl
1685         beqz    a7, 1b
1686         movi    a2, 1
1687         leaf_return
1688
1689 .Lle_cmp:
1690         /* Check if x and y have different signs.  */
1691         xor     a7, xh, yh
1692         bltz    a7, .Lle_diff_signs
1693
1694         /* Check if x is negative.  */
1695         bltz    xh, .Lle_xneg
1696
1697         /* Check if x <= y.  */
1698         bltu    xh, yh, 4f
1699         bne     xh, yh, 5f
1700         bltu    yl, xl, 5f
1701 4:      movi    a2, 0
1702         leaf_return
1703
1704 .Lle_xneg:
1705         /* Check if y <= x.  */
1706         bltu    yh, xh, 4b
1707         bne     yh, xh, 5f
1708         bgeu    xl, yl, 4b
1709 5:      movi    a2, 1
1710         leaf_return
1711
1712 .Lle_diff_signs:
1713         bltz    xh, 4b
1714
1715         /* Check if both x and y are zero.  */
1716         or      a7, xh, yh
1717         slli    a7, a7, 1
1718         or      a7, a7, xl
1719         or      a7, a7, yl
1720         movi    a2, 1
1721         movi    a3, 0
1722         moveqz  a2, a3, a7
1723         leaf_return
1724
1725
1726         /* Greater Than or Equal */
1727
1728         .align  4
1729         .global __gedf2
1730         .type   __gedf2, @function
1731 __gedf2:
1732         leaf_entry sp, 16
1733         movi    a6, 0x7ff00000
1734         ball    xh, a6, 2f
1735 1:      bnall   yh, a6, .Llt_cmp
1736
1737         /* Check if y is a NaN.  */
1738         slli    a7, yh, 12
1739         or      a7, a7, yl
1740         beqz    a7, .Llt_cmp
1741         movi    a2, -1
1742         leaf_return
1743
1744         /* Check if x is a NaN.  */
1745 2:      slli    a7, xh, 12
1746         or      a7, a7, xl
1747         beqz    a7, 1b
1748         movi    a2, -1
1749         leaf_return
1750
1751
1752         /* Less Than */
1753
1754         .align  4
1755         .global __ltdf2
1756         .type   __ltdf2, @function
1757 __ltdf2:
1758         leaf_entry sp, 16
1759         movi    a6, 0x7ff00000
1760         ball    xh, a6, 2f
1761 1:      bnall   yh, a6, .Llt_cmp
1762
1763         /* Check if y is a NaN.  */
1764         slli    a7, yh, 12
1765         or      a7, a7, yl
1766         beqz    a7, .Llt_cmp
1767         movi    a2, 0
1768         leaf_return
1769
1770         /* Check if x is a NaN.  */
1771 2:      slli    a7, xh, 12
1772         or      a7, a7, xl
1773         beqz    a7, 1b
1774         movi    a2, 0
1775         leaf_return
1776
1777 .Llt_cmp:
1778         /* Check if x and y have different signs.  */
1779         xor     a7, xh, yh
1780         bltz    a7, .Llt_diff_signs
1781
1782         /* Check if x is negative.  */
1783         bltz    xh, .Llt_xneg
1784
1785         /* Check if x < y.  */
1786         bltu    xh, yh, 4f
1787         bne     xh, yh, 5f
1788         bgeu    xl, yl, 5f
1789 4:      movi    a2, -1
1790         leaf_return
1791
1792 .Llt_xneg:
1793         /* Check if y < x.  */
1794         bltu    yh, xh, 4b
1795         bne     yh, xh, 5f
1796         bltu    yl, xl, 4b
1797 5:      movi    a2, 0
1798         leaf_return
1799
1800 .Llt_diff_signs:
1801         bgez    xh, 5b
1802
1803         /* Check if both x and y are nonzero.  */
1804         or      a7, xh, yh
1805         slli    a7, a7, 1
1806         or      a7, a7, xl
1807         or      a7, a7, yl
1808         movi    a2, 0
1809         movi    a3, -1
1810         movnez  a2, a3, a7
1811         leaf_return
1812
1813
1814         /* Unordered */
1815
1816         .align  4
1817         .global __unorddf2
1818         .type   __unorddf2, @function
1819 __unorddf2:
1820         leaf_entry sp, 16
1821         movi    a6, 0x7ff00000
1822         ball    xh, a6, 3f
1823 1:      ball    yh, a6, 4f
1824 2:      movi    a2, 0
1825         leaf_return
1826
1827 3:      slli    a7, xh, 12
1828         or      a7, a7, xl
1829         beqz    a7, 1b
1830         movi    a2, 1
1831         leaf_return
1832
1833 4:      slli    a7, yh, 12
1834         or      a7, a7, yl
1835         beqz    a7, 2b
1836         movi    a2, 1
1837         leaf_return
1838
1839 #endif /* L_cmpdf2 */
1840
1841 #ifdef L_fixdfsi
1842
1843         .align  4
1844         .global __fixdfsi
1845         .type   __fixdfsi, @function
1846 __fixdfsi:
1847         leaf_entry sp, 16
1848
1849         /* Check for NaN and Infinity.  */
1850         movi    a6, 0x7ff00000
1851         ball    xh, a6, .Lfixdfsi_nan_or_inf
1852
1853         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1854         extui   a4, xh, 20, 11
1855         extui   a5, a6, 19, 10  /* 0x3fe */
1856         sub     a4, a4, a5
1857         bgei    a4, 32, .Lfixdfsi_maxint
1858         blti    a4, 1, .Lfixdfsi_zero
1859
1860         /* Add explicit "1.0" and shift << 11.  */
1861         or      a7, xh, a6
1862         ssai    (32 - 11)
1863         src     a5, a7, xl
1864
1865         /* Shift back to the right, based on the exponent.  */
1866         ssl     a4              /* shift by 32 - a4 */
1867         srl     a5, a5
1868
1869         /* Negate the result if sign != 0.  */
1870         neg     a2, a5
1871         movgez  a2, a5, a7
1872         leaf_return
1873
1874 .Lfixdfsi_nan_or_inf:
1875         /* Handle Infinity and NaN.  */
1876         slli    a4, xh, 12
1877         or      a4, a4, xl
1878         beqz    a4, .Lfixdfsi_maxint
1879
1880         /* Translate NaN to +maxint.  */
1881         movi    xh, 0
1882
1883 .Lfixdfsi_maxint:
1884         slli    a4, a6, 11      /* 0x80000000 */
1885         addi    a5, a4, -1      /* 0x7fffffff */
1886         movgez  a4, a5, xh
1887         mov     a2, a4
1888         leaf_return
1889
1890 .Lfixdfsi_zero:
1891         movi    a2, 0
1892         leaf_return
1893
1894 #endif /* L_fixdfsi */
1895
1896 #ifdef L_fixdfdi
1897
1898         .align  4
1899         .global __fixdfdi
1900         .type   __fixdfdi, @function
1901 __fixdfdi:
1902         leaf_entry sp, 16
1903
1904         /* Check for NaN and Infinity.  */
1905         movi    a6, 0x7ff00000
1906         ball    xh, a6, .Lfixdfdi_nan_or_inf
1907
1908         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1909         extui   a4, xh, 20, 11
1910         extui   a5, a6, 19, 10  /* 0x3fe */
1911         sub     a4, a4, a5
1912         bgei    a4, 64, .Lfixdfdi_maxint
1913         blti    a4, 1, .Lfixdfdi_zero
1914
1915         /* Add explicit "1.0" and shift << 11.  */
1916         or      a7, xh, a6
1917         ssai    (32 - 11)
1918         src     xh, a7, xl
1919         sll     xl, xl
1920
1921         /* Shift back to the right, based on the exponent.  */
1922         ssl     a4              /* shift by 64 - a4 */
1923         bgei    a4, 32, .Lfixdfdi_smallshift
1924         srl     xl, xh
1925         movi    xh, 0
1926
1927 .Lfixdfdi_shifted:
1928         /* Negate the result if sign != 0.  */
1929         bgez    a7, 1f
1930         neg     xl, xl
1931         neg     xh, xh
1932         beqz    xl, 1f
1933         addi    xh, xh, -1
1934 1:      leaf_return
1935
1936 .Lfixdfdi_smallshift:
1937         src     xl, xh, xl
1938         srl     xh, xh
1939         j       .Lfixdfdi_shifted
1940
1941 .Lfixdfdi_nan_or_inf:
1942         /* Handle Infinity and NaN.  */
1943         slli    a4, xh, 12
1944         or      a4, a4, xl
1945         beqz    a4, .Lfixdfdi_maxint
1946
1947         /* Translate NaN to +maxint.  */
1948         movi    xh, 0
1949
1950 .Lfixdfdi_maxint:
1951         slli    a7, a6, 11      /* 0x80000000 */
1952         bgez    xh, 1f
1953         mov     xh, a7
1954         movi    xl, 0
1955         leaf_return
1956
1957 1:      addi    xh, a7, -1      /* 0x7fffffff */
1958         movi    xl, -1
1959         leaf_return
1960
1961 .Lfixdfdi_zero:
1962         movi    xh, 0
1963         movi    xl, 0
1964         leaf_return
1965
1966 #endif /* L_fixdfdi */
1967
1968 #ifdef L_fixunsdfsi
1969
1970         .align  4
1971         .global __fixunsdfsi
1972         .type   __fixunsdfsi, @function
1973 __fixunsdfsi:
1974         leaf_entry sp, 16
1975
1976         /* Check for NaN and Infinity.  */
1977         movi    a6, 0x7ff00000
1978         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1979
1980         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1981         extui   a4, xh, 20, 11
1982         extui   a5, a6, 20, 10  /* 0x3ff */
1983         sub     a4, a4, a5
1984         bgei    a4, 32, .Lfixunsdfsi_maxint
1985         bltz    a4, .Lfixunsdfsi_zero
1986
1987         /* Add explicit "1.0" and shift << 11.  */
1988         or      a7, xh, a6
1989         ssai    (32 - 11)
1990         src     a5, a7, xl
1991
1992         /* Shift back to the right, based on the exponent.  */
1993         addi    a4, a4, 1
1994         beqi    a4, 32, .Lfixunsdfsi_bigexp
1995         ssl     a4              /* shift by 32 - a4 */
1996         srl     a5, a5
1997
1998         /* Negate the result if sign != 0.  */
1999         neg     a2, a5
2000         movgez  a2, a5, a7
2001         leaf_return
2002
2003 .Lfixunsdfsi_nan_or_inf:
2004         /* Handle Infinity and NaN.  */
2005         slli    a4, xh, 12
2006         or      a4, a4, xl
2007         beqz    a4, .Lfixunsdfsi_maxint
2008
2009         /* Translate NaN to 0xffffffff.  */
2010         movi    a2, -1
2011         leaf_return
2012
2013 .Lfixunsdfsi_maxint:
2014         slli    a4, a6, 11      /* 0x80000000 */
2015         movi    a5, -1          /* 0xffffffff */
2016         movgez  a4, a5, xh
2017         mov     a2, a4
2018         leaf_return
2019
2020 .Lfixunsdfsi_zero:
2021         movi    a2, 0
2022         leaf_return
2023
2024 .Lfixunsdfsi_bigexp:
2025         /* Handle unsigned maximum exponent case.  */
2026         bltz    xh, 1f
2027         mov     a2, a5          /* no shift needed */
2028         leaf_return
2029
2030         /* Return 0x80000000 if negative.  */
2031 1:      slli    a2, a6, 11
2032         leaf_return
2033
2034 #endif /* L_fixunsdfsi */
2035
2036 #ifdef L_fixunsdfdi
2037
2038         .align  4
2039         .global __fixunsdfdi
2040         .type   __fixunsdfdi, @function
2041 __fixunsdfdi:
2042         leaf_entry sp, 16
2043
2044         /* Check for NaN and Infinity.  */
2045         movi    a6, 0x7ff00000
2046         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
2047
2048         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
2049         extui   a4, xh, 20, 11
2050         extui   a5, a6, 20, 10  /* 0x3ff */
2051         sub     a4, a4, a5
2052         bgei    a4, 64, .Lfixunsdfdi_maxint
2053         bltz    a4, .Lfixunsdfdi_zero
2054
2055         /* Add explicit "1.0" and shift << 11.  */
2056         or      a7, xh, a6
2057         ssai    (32 - 11)
2058         src     xh, a7, xl
2059         sll     xl, xl
2060
2061         /* Shift back to the right, based on the exponent.  */
2062         addi    a4, a4, 1
2063         beqi    a4, 64, .Lfixunsdfdi_bigexp
2064         ssl     a4              /* shift by 64 - a4 */
2065         bgei    a4, 32, .Lfixunsdfdi_smallshift
2066         srl     xl, xh
2067         movi    xh, 0
2068
2069 .Lfixunsdfdi_shifted:
2070         /* Negate the result if sign != 0.  */
2071         bgez    a7, 1f
2072         neg     xl, xl
2073         neg     xh, xh
2074         beqz    xl, 1f
2075         addi    xh, xh, -1
2076 1:      leaf_return
2077
2078 .Lfixunsdfdi_smallshift:
2079         src     xl, xh, xl
2080         srl     xh, xh
2081         j       .Lfixunsdfdi_shifted
2082
2083 .Lfixunsdfdi_nan_or_inf:
2084         /* Handle Infinity and NaN.  */
2085         slli    a4, xh, 12
2086         or      a4, a4, xl
2087         beqz    a4, .Lfixunsdfdi_maxint
2088
2089         /* Translate NaN to 0xffffffff.... */
2090 1:      movi    xh, -1
2091         movi    xl, -1
2092         leaf_return
2093
2094 .Lfixunsdfdi_maxint:
2095         bgez    xh, 1b
2096 2:      slli    xh, a6, 11      /* 0x80000000 */
2097         movi    xl, 0
2098         leaf_return
2099
2100 .Lfixunsdfdi_zero:
2101         movi    xh, 0
2102         movi    xl, 0
2103         leaf_return
2104
2105 .Lfixunsdfdi_bigexp:
2106         /* Handle unsigned maximum exponent case.  */
2107         bltz    a7, 2b
2108         leaf_return             /* no shift needed */
2109
2110 #endif /* L_fixunsdfdi */
2111
2112 #ifdef L_floatsidf
2113
2114         .align  4
2115         .global __floatunsidf
2116         .type   __floatunsidf, @function
2117 __floatunsidf:
2118         leaf_entry sp, 16
2119         beqz    a2, .Lfloatsidf_return_zero
2120
2121         /* Set the sign to zero and jump to the floatsidf code.  */
2122         movi    a7, 0
2123         j       .Lfloatsidf_normalize
2124
2125         .align  4
2126         .global __floatsidf
2127         .type   __floatsidf, @function
2128 __floatsidf:
2129         leaf_entry sp, 16
2130
2131         /* Check for zero.  */
2132         beqz    a2, .Lfloatsidf_return_zero
2133
2134         /* Save the sign.  */
2135         extui   a7, a2, 31, 1
2136
2137         /* Get the absolute value.  */
2138 #if XCHAL_HAVE_ABS
2139         abs     a2, a2
2140 #else
2141         neg     a4, a2
2142         movltz  a2, a4, a2
2143 #endif
2144
2145 .Lfloatsidf_normalize:
2146         /* Normalize with the first 1 bit in the msb.  */
2147         do_nsau a4, a2, a5, a6
2148         ssl     a4
2149         sll     a5, a2
2150
2151         /* Shift the mantissa into position.  */
2152         srli    xh, a5, 11
2153         slli    xl, a5, (32 - 11)
2154
2155         /* Set the exponent.  */
2156         movi    a5, 0x41d       /* 0x3fe + 31 */
2157         sub     a5, a5, a4
2158         slli    a5, a5, 20
2159         add     xh, xh, a5
2160
2161         /* Add the sign and return. */
2162         slli    a7, a7, 31
2163         or      xh, xh, a7
2164         leaf_return
2165
2166 .Lfloatsidf_return_zero:
2167         movi    a3, 0
2168         leaf_return
2169
2170 #endif /* L_floatsidf */
2171
2172 #ifdef L_floatdidf
2173
2174         .align  4
2175         .global __floatundidf
2176         .type   __floatundidf, @function
2177 __floatundidf:
2178         leaf_entry sp, 16
2179
2180         /* Check for zero.  */
2181         or      a4, xh, xl
2182         beqz    a4, 2f
2183
2184         /* Set the sign to zero and jump to the floatdidf code.  */
2185         movi    a7, 0
2186         j       .Lfloatdidf_normalize
2187
2188         .align  4
2189         .global __floatdidf
2190         .type   __floatdidf, @function
2191 __floatdidf:
2192         leaf_entry sp, 16
2193
2194         /* Check for zero.  */
2195         or      a4, xh, xl
2196         beqz    a4, 2f
2197
2198         /* Save the sign.  */
2199         extui   a7, xh, 31, 1
2200
2201         /* Get the absolute value.  */
2202         bgez    xh, .Lfloatdidf_normalize
2203         neg     xl, xl
2204         neg     xh, xh
2205         beqz    xl, .Lfloatdidf_normalize
2206         addi    xh, xh, -1
2207
2208 .Lfloatdidf_normalize:
2209         /* Normalize with the first 1 bit in the msb of xh.  */
2210         beqz    xh, .Lfloatdidf_bigshift
2211         do_nsau a4, xh, a5, a6
2212         ssl     a4
2213         src     xh, xh, xl
2214         sll     xl, xl
2215
2216 .Lfloatdidf_shifted:
2217         /* Shift the mantissa into position, with rounding bits in a6.  */
2218         ssai    11
2219         sll     a6, xl
2220         src     xl, xh, xl
2221         srl     xh, xh
2222
2223         /* Set the exponent.  */
2224         movi    a5, 0x43d       /* 0x3fe + 63 */
2225         sub     a5, a5, a4
2226         slli    a5, a5, 20
2227         add     xh, xh, a5
2228
2229         /* Add the sign.  */
2230         slli    a7, a7, 31
2231         or      xh, xh, a7
2232
2233         /* Round up if the leftover fraction is >= 1/2.  */
2234         bgez    a6, 2f
2235         addi    xl, xl, 1
2236         beqz    xl, .Lfloatdidf_roundcarry
2237
2238         /* Check if the leftover fraction is exactly 1/2.  */
2239         slli    a6, a6, 1
2240         beqz    a6, .Lfloatdidf_exactlyhalf
2241 2:      leaf_return
2242
2243 .Lfloatdidf_bigshift:
2244         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2245         do_nsau a4, xl, a5, a6
2246         ssl     a4
2247         sll     xh, xl
2248         movi    xl, 0
2249         addi    a4, a4, 32
2250         j       .Lfloatdidf_shifted
2251
2252 .Lfloatdidf_exactlyhalf:
2253         /* Round down to the nearest even value.  */
2254         srli    xl, xl, 1
2255         slli    xl, xl, 1
2256         leaf_return
2257
2258 .Lfloatdidf_roundcarry:
2259         /* xl is always zero when the rounding increment overflows, so
2260            there's no need to round it to an even value.  */
2261         addi    xh, xh, 1
2262         /* Overflow to the exponent is OK.  */
2263         leaf_return
2264
2265 #endif /* L_floatdidf */
2266
2267 #ifdef L_truncdfsf2
2268
2269         .align  4
2270         .global __truncdfsf2
2271         .type   __truncdfsf2, @function
2272 __truncdfsf2:
2273         leaf_entry sp, 16
2274
2275         /* Adjust the exponent bias.  */
2276         movi    a4, (0x3ff - 0x7f) << 20
2277         sub     a5, xh, a4
2278
2279         /* Check for underflow.  */
2280         xor     a6, xh, a5
2281         bltz    a6, .Ltrunc_underflow
2282         extui   a6, a5, 20, 11
2283         beqz    a6, .Ltrunc_underflow
2284
2285         /* Check for overflow.  */
2286         movi    a4, 255
2287         bge     a6, a4, .Ltrunc_overflow
2288
2289         /* Shift a5/xl << 3 into a5/a4.  */
2290         ssai    (32 - 3)
2291         src     a5, a5, xl
2292         sll     a4, xl
2293
2294 .Ltrunc_addsign:
2295         /* Add the sign bit.  */
2296         extui   a6, xh, 31, 1
2297         slli    a6, a6, 31
2298         or      a2, a6, a5
2299
2300         /* Round up if the leftover fraction is >= 1/2.  */
2301         bgez    a4, 1f
2302         addi    a2, a2, 1
2303         /* Overflow to the exponent is OK.  The answer will be correct.  */
2304
2305         /* Check if the leftover fraction is exactly 1/2.  */
2306         slli    a4, a4, 1
2307         beqz    a4, .Ltrunc_exactlyhalf
2308 1:      leaf_return
2309
2310 .Ltrunc_exactlyhalf:
2311         /* Round down to the nearest even value.  */
2312         srli    a2, a2, 1
2313         slli    a2, a2, 1
2314         leaf_return
2315
2316 .Ltrunc_overflow:
2317         /* Check if exponent == 0x7ff.  */
2318         movi    a4, 0x7ff00000
2319         bnall   xh, a4, 1f
2320
2321         /* Check if mantissa is nonzero.  */
2322         slli    a5, xh, 12
2323         or      a5, a5, xl
2324         beqz    a5, 1f
2325
2326         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2327         srli    a4, a4, 1
2328
2329 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2330         /* Add the sign bit.  */
2331         extui   a6, xh, 31, 1
2332         ssai    1
2333         src     a2, a6, a4
2334         leaf_return
2335
2336 .Ltrunc_underflow:
2337         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2338         extui   a6, xh, 20, 11
2339         movi    a5, 0x3ff - 0x7f
2340         sub     a6, a5, a6
2341         addi    a6, a6, 1
2342         bgeui   a6, 32, 1f
2343
2344         /* Replace the exponent with an explicit "1.0".  */
2345         slli    a5, a5, 13      /* 0x700000 */
2346         or      a5, a5, xh
2347         slli    a5, a5, 11
2348         srli    a5, a5, 11
2349
2350         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2351         ssai    (32 - 3)
2352         src     a5, a5, xl
2353         sll     a4, xl
2354
2355         /* Shift right by a6.  */
2356         ssr     a6
2357         sll     a7, a4
2358         src     a4, a5, a4
2359         srl     a5, a5
2360         beqz    a7, .Ltrunc_addsign
2361         or      a4, a4, a6      /* any positive, nonzero value will work */
2362         j       .Ltrunc_addsign
2363
2364         /* Return +/- zero.  */
2365 1:      extui   a2, xh, 31, 1
2366         slli    a2, a2, 31
2367         leaf_return
2368
2369 #endif /* L_truncdfsf2 */
2370
2371 #ifdef L_extendsfdf2
2372
2373         .align  4
2374         .global __extendsfdf2
2375         .type   __extendsfdf2, @function
2376 __extendsfdf2:
2377         leaf_entry sp, 16
2378
2379         /* Save the sign bit and then shift it off.  */
2380         extui   a5, a2, 31, 1
2381         slli    a5, a5, 31
2382         slli    a4, a2, 1
2383
2384         /* Extract and check the exponent.  */
2385         extui   a6, a2, 23, 8
2386         beqz    a6, .Lextend_expzero
2387         addi    a6, a6, 1
2388         beqi    a6, 256, .Lextend_nan_or_inf
2389
2390         /* Shift >> 3 into a4/xl.  */
2391         srli    a4, a4, 4
2392         slli    xl, a2, (32 - 3)
2393
2394         /* Adjust the exponent bias.  */
2395         movi    a6, (0x3ff - 0x7f) << 20
2396         add     a4, a4, a6
2397
2398         /* Add the sign bit.  */
2399         or      xh, a4, a5
2400         leaf_return
2401
2402 .Lextend_nan_or_inf:
2403         movi    a4, 0x7ff00000
2404
2405         /* Check for NaN.  */
2406         slli    a7, a2, 9
2407         beqz    a7, 1f
2408
2409         slli    a6, a6, 11      /* 0x80000 */
2410         or      a4, a4, a6
2411
2412         /* Add the sign and return.  */
2413 1:      or      xh, a4, a5
2414         movi    xl, 0
2415         leaf_return
2416
2417 .Lextend_expzero:
2418         beqz    a4, 1b
2419
2420         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2421         do_nsau a7, a4, a2, a3
2422         addi    a7, a7, -8
2423         ssl     a7
2424         sll     a4, a4
2425
2426         /* Shift >> 3 into a4/xl.  */
2427         slli    xl, a4, (32 - 3)
2428         srli    a4, a4, 3
2429
2430         /* Set the exponent.  */
2431         movi    a6, 0x3fe - 0x7f
2432         sub     a6, a6, a7
2433         slli    a6, a6, 20
2434         add     a4, a4, a6
2435
2436         /* Add the sign and return.  */
2437         or      xh, a4, a5
2438         leaf_return
2439
2440 #endif /* L_extendsfdf2 */
2441
2442
2443 #if XCHAL_HAVE_DFP_SQRT
2444 #ifdef L_sqrt
2445
2446         .text
2447         .align 4
2448         .global __ieee754_sqrt
2449         .type   __ieee754_sqrt, @function
2450 __ieee754_sqrt:
2451         leaf_entry      sp, 16
2452
2453         wfrd            f1, xh, xl
2454
2455         sqrt0.d         f2, f1
2456         const.d         f4, 0
2457         maddn.d         f4, f2, f2
2458         nexp01.d        f3, f1
2459         const.d         f0, 3
2460         addexp.d        f3, f0
2461         maddn.d         f0, f4, f3
2462         nexp01.d        f4, f1
2463         maddn.d         f2, f0, f2
2464         const.d         f5, 0
2465         maddn.d         f5, f2, f3
2466         const.d         f0, 3
2467         maddn.d         f0, f5, f2
2468         neg.d           f6, f4
2469         maddn.d         f2, f0, f2
2470         const.d         f0, 0
2471         const.d         f5, 0
2472         const.d         f7, 0
2473         maddn.d         f0, f6, f2
2474         maddn.d         f5, f2, f3
2475         const.d         f3, 3
2476         maddn.d         f7, f3, f2
2477         maddn.d         f4, f0, f0
2478         maddn.d         f3, f5, f2
2479         neg.d           f2, f7
2480         maddn.d         f0, f4, f2
2481         maddn.d         f7, f3, f7
2482         mksadj.d        f2, f1
2483         nexp01.d        f1, f1
2484         maddn.d         f1, f0, f0
2485         neg.d           f3, f7
2486         addexpm.d       f0, f2
2487         addexp.d        f3, f2
2488         divn.d          f0, f1, f3
2489
2490         rfr             xl, f0
2491         rfrd            xh, f0
2492
2493         leaf_return
2494
2495 #endif /* L_sqrt */
2496 #endif /* XCHAL_HAVE_DFP_SQRT */
2497
2498 #if XCHAL_HAVE_DFP_RECIP
2499 #ifdef L_recipdf2
2500         /* Reciprocal */
2501
2502         .align  4
2503         .global __recipdf2
2504         .type   __recipdf2, @function
2505 __recipdf2:
2506         leaf_entry      sp, 16
2507
2508         wfrd            f1, xh, xl
2509
2510         recip0.d        f0, f1
2511         const.d         f2, 2
2512         msub.d          f2, f1, f0
2513         mul.d           f3, f1, f0
2514         const.d         f4, 2
2515         mul.d           f5, f0, f2
2516         msub.d          f4, f3, f2
2517         const.d         f2, 1
2518         mul.d           f0, f5, f4
2519         msub.d          f2, f1, f0
2520         maddn.d         f0, f0, f2
2521
2522         rfr             xl, f0
2523         rfrd            xh, f0
2524
2525         leaf_return
2526
2527 #endif /* L_recipdf2 */
2528 #endif /* XCHAL_HAVE_DFP_RECIP */
2529
2530 #if XCHAL_HAVE_DFP_RSQRT
2531 #ifdef L_rsqrtdf2
2532         /* Reciprocal square root */
2533
2534         .align  4
2535         .global __rsqrtdf2
2536         .type   __rsqrtdf2, @function
2537 __rsqrtdf2:
2538         leaf_entry      sp, 16
2539
2540         wfrd            f1, xh, xl
2541
2542         rsqrt0.d        f0, f1
2543         mul.d           f2, f1, f0
2544         const.d         f3, 3
2545         mul.d           f4, f3, f0
2546         const.d         f5, 1
2547         msub.d          f5, f2, f0
2548         maddn.d         f0, f4, f5
2549         const.d         f2, 1
2550         mul.d           f4, f1, f0
2551         mul.d           f5, f3, f0
2552         msub.d          f2, f4, f0
2553         maddn.d         f0, f5, f2
2554         const.d         f2, 1
2555         mul.d           f1, f1, f0
2556         mul.d           f3, f3, f0
2557         msub.d          f2, f1, f0
2558         maddn.d         f0, f3, f2
2559
2560         rfr             xl, f0
2561         rfrd            xh, f0
2562
2563         leaf_return
2564
2565 #endif /* L_rsqrtdf2 */
2566 #endif /* XCHAL_HAVE_DFP_RSQRT */