libgcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006-2015 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negdf2
  44
  45         .align  4
  46         .global __negdf2
  47         .type   __negdf2, @function
  48 __negdf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     xh, xh, a4
  52         leaf_return
  53
  54 #endif /* L_negdf2 */
  55
  56 #ifdef L_addsubdf3
  57
  58         /* Addition */
  59 __adddf3_aux:
  60
  61         /* Handle NaNs and Infinities.  (This code is placed before the
  62            start of the function just to keep it in range of the limited
  63            branch displacements.)  */
  64
  65 .Ladd_xnan_or_inf:
  66         /* If y is neither Infinity nor NaN, return x.  */
  67         bnall   yh, a6, 1f
  68         /* If x is a NaN, return it.  Otherwise, return y.  */
  69         slli    a7, xh, 12
  70         or      a7, a7, xl
  71         beqz    a7, .Ladd_ynan_or_inf
  72 1:      leaf_return
  73
  74 .Ladd_ynan_or_inf:
  75         /* Return y.  */
  76         mov     xh, yh
  77         mov     xl, yl
  78         leaf_return
  79
  80 .Ladd_opposite_signs:
  81         /* Operand signs differ.  Do a subtraction.  */
  82         slli    a7, a6, 11
  83         xor     yh, yh, a7
  84         j       .Lsub_same_sign
  85
  86         .align  4
  87         .global __adddf3
  88         .type   __adddf3, @function
  89 __adddf3:
  90         leaf_entry sp, 16
  91         movi    a6, 0x7ff00000
  92
  93         /* Check if the two operands have the same sign.  */
  94         xor     a7, xh, yh
  95         bltz    a7, .Ladd_opposite_signs
  96
  97 .Ladd_same_sign:
  98         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
  99         ball    xh, a6, .Ladd_xnan_or_inf
 100         ball    yh, a6, .Ladd_ynan_or_inf
 101
 102         /* Compare the exponents.  The smaller operand will be shifted
 103            right by the exponent difference and added to the larger
 104            one.  */
 105         extui   a7, xh, 20, 12
 106         extui   a8, yh, 20, 12
 107         bltu    a7, a8, .Ladd_shiftx
 108
 109 .Ladd_shifty:
 110         /* Check if the smaller (or equal) exponent is zero.  */
 111         bnone   yh, a6, .Ladd_yexpzero
 112
 113         /* Replace yh sign/exponent with 0x001.  */
 114         or      yh, yh, a6
 115         slli    yh, yh, 11
 116         srli    yh, yh, 11
 117
 118 .Ladd_yexpdiff:
 119         /* Compute the exponent difference.  Optimize for difference < 32.  */
 120         sub     a10, a7, a8
 121         bgeui   a10, 32, .Ladd_bigshifty
 122
 123         /* Shift yh/yl right by the exponent difference.  Any bits that are
 124            shifted out of yl are saved in a9 for rounding the result.  */
 125         ssr     a10
 126         movi    a9, 0
 127         src     a9, yl, a9
 128         src     yl, yh, yl
 129         srl     yh, yh
 130
 131 .Ladd_addy:
 132         /* Do the 64-bit addition.  */
 133         add     xl, xl, yl
 134         add     xh, xh, yh
 135         bgeu    xl, yl, 1f
 136         addi    xh, xh, 1
 137 1:
 138         /* Check if the add overflowed into the exponent.  */
 139         extui   a10, xh, 20, 12
 140         beq     a10, a7, .Ladd_round
 141         mov     a8, a7
 142         j       .Ladd_carry
 143
 144 .Ladd_yexpzero:
 145         /* y is a subnormal value.  Replace its sign/exponent with zero,
 146            i.e., no implicit "1.0", and increment the apparent exponent
 147            because subnormals behave as if they had the minimum (nonzero)
 148            exponent.  Test for the case when both exponents are zero.  */
 149         slli    yh, yh, 12
 150         srli    yh, yh, 12
 151         bnone   xh, a6, .Ladd_bothexpzero
 152         addi    a8, a8, 1
 153         j       .Ladd_yexpdiff
 154
 155 .Ladd_bothexpzero:
 156         /* Both exponents are zero.  Handle this as a special case.  There
 157            is no need to shift or round, and the normal code for handling
 158            a carry into the exponent field will not work because it
 159            assumes there is an implicit "1.0" that needs to be added.  */
 160         add     xl, xl, yl
 161         add     xh, xh, yh
 162         bgeu    xl, yl, 1f
 163         addi    xh, xh, 1
 164 1:      leaf_return
 165
 166 .Ladd_bigshifty:
 167         /* Exponent difference > 64 -- just return the bigger value.  */
 168         bgeui   a10, 64, 1b
 169
 170         /* Shift yh/yl right by the exponent difference.  Any bits that are
 171            shifted out are saved in a9 for rounding the result.  */
 172         ssr     a10
 173         sll     a11, yl         /* lost bits shifted out of yl */
 174         src     a9, yh, yl
 175         srl     yl, yh
 176         movi    yh, 0
 177         beqz    a11, .Ladd_addy
 178         or      a9, a9, a10     /* any positive, nonzero value will work */
 179         j       .Ladd_addy
 180
 181 .Ladd_xexpzero:
 182         /* Same as "yexpzero" except skip handling the case when both
 183            exponents are zero.  */
 184         slli    xh, xh, 12
 185         srli    xh, xh, 12
 186         addi    a7, a7, 1
 187         j       .Ladd_xexpdiff
 188
 189 .Ladd_shiftx:
 190         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 191            because the exponent difference is always nonzero in this version,
 192            the shift sequence can use SLL and skip loading a constant zero.  */
 193         bnone   xh, a6, .Ladd_xexpzero
 194
 195         or      xh, xh, a6
 196         slli    xh, xh, 11
 197         srli    xh, xh, 11
 198
 199 .Ladd_xexpdiff:
 200         sub     a10, a8, a7
 201         bgeui   a10, 32, .Ladd_bigshiftx
 202
 203         ssr     a10
 204         sll     a9, xl
 205         src     xl, xh, xl
 206         srl     xh, xh
 207
 208 .Ladd_addx:
 209         add     xl, xl, yl
 210         add     xh, xh, yh
 211         bgeu    xl, yl, 1f
 212         addi    xh, xh, 1
 213 1:
 214         /* Check if the add overflowed into the exponent.  */
 215         extui   a10, xh, 20, 12
 216         bne     a10, a8, .Ladd_carry
 217
 218 .Ladd_round:
 219         /* Round up if the leftover fraction is >= 1/2.  */
 220         bgez    a9, 1f
 221         addi    xl, xl, 1
 222         beqz    xl, .Ladd_roundcarry
 223
 224         /* Check if the leftover fraction is exactly 1/2.  */
 225         slli    a9, a9, 1
 226         beqz    a9, .Ladd_exactlyhalf
 227 1:      leaf_return
 228
 229 .Ladd_bigshiftx:
 230         /* Mostly the same thing as "bigshifty"....  */
 231         bgeui   a10, 64, .Ladd_returny
 232
 233         ssr     a10
 234         sll     a11, xl
 235         src     a9, xh, xl
 236         srl     xl, xh
 237         movi    xh, 0
 238         beqz    a11, .Ladd_addx
 239         or      a9, a9, a10
 240         j       .Ladd_addx
 241
 242 .Ladd_returny:
 243         mov     xh, yh
 244         mov     xl, yl
 245         leaf_return
 246
 247 .Ladd_carry:
 248         /* The addition has overflowed into the exponent field, so the
 249            value needs to be renormalized.  The mantissa of the result
 250            can be recovered by subtracting the original exponent and
 251            adding 0x100000 (which is the explicit "1.0" for the
 252            mantissa of the non-shifted operand -- the "1.0" for the
 253            shifted operand was already added).  The mantissa can then
 254            be shifted right by one bit.  The explicit "1.0" of the
 255            shifted mantissa then needs to be replaced by the exponent,
 256            incremented by one to account for the normalizing shift.
 257            It is faster to combine these operations: do the shift first
 258            and combine the additions and subtractions.  If x is the
 259            original exponent, the result is:
 260                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 261            or:
 262                shifted mantissa + ((x + 1) << 19)
 263            Note that the exponent is incremented here by leaving the
 264            explicit "1.0" of the mantissa in the exponent field.  */
 265
 266         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 267         mov     a10, xl
 268         ssai    1
 269         src     xl, xh, xl
 270         srl     xh, xh
 271
 272         /* See explanation above.  The original exponent is in a8.  */
 273         addi    a8, a8, 1
 274         slli    a8, a8, 19
 275         add     xh, xh, a8
 276
 277         /* Return an Infinity if the exponent overflowed.  */
 278         ball    xh, a6, .Ladd_infinity
 279
 280         /* Same thing as the "round" code except the msb of the leftover
 281            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 282         bbci.l  a10, 0, 1f
 283         addi    xl, xl, 1
 284         beqz    xl, .Ladd_roundcarry
 285         beqz    a9, .Ladd_exactlyhalf
 286 1:      leaf_return
 287
 288 .Ladd_infinity:
 289         /* Clear the mantissa.  */
 290         movi    xl, 0
 291         srli    xh, xh, 20
 292         slli    xh, xh, 20
 293
 294         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 295         slli    a8, a8, 1
 296         or      xh, xh, a8
 297         leaf_return
 298
 299 .Ladd_exactlyhalf:
 300         /* Round down to the nearest even value.  */
 301         srli    xl, xl, 1
 302         slli    xl, xl, 1
 303         leaf_return
 304
 305 .Ladd_roundcarry:
 306         /* xl is always zero when the rounding increment overflows, so
 307            there's no need to round it to an even value.  */
 308         addi    xh, xh, 1
 309         /* Overflow to the exponent is OK.  */
 310         leaf_return
 311
 312
 313         /* Subtraction */
 314 __subdf3_aux:
 315
 316         /* Handle NaNs and Infinities.  (This code is placed before the
 317            start of the function just to keep it in range of the limited
 318            branch displacements.)  */
 319
 320 .Lsub_xnan_or_inf:
 321         /* If y is neither Infinity nor NaN, return x.  */
 322         bnall   yh, a6, 1f
 323         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 324         movi    a4, 0x80000     /* make it a quiet NaN */
 325         or      xh, xh, a4
 326 1:      leaf_return
 327
 328 .Lsub_ynan_or_inf:
 329         /* Negate y and return it.  */
 330         slli    a7, a6, 11
 331         xor     xh, yh, a7
 332         mov     xl, yl
 333         leaf_return
 334
 335 .Lsub_opposite_signs:
 336         /* Operand signs differ.  Do an addition.  */
 337         slli    a7, a6, 11
 338         xor     yh, yh, a7
 339         j       .Ladd_same_sign
 340
 341         .align  4
 342         .global __subdf3
 343         .type   __subdf3, @function
 344 __subdf3:
 345         leaf_entry sp, 16
 346         movi    a6, 0x7ff00000
 347
 348         /* Check if the two operands have the same sign.  */
 349         xor     a7, xh, yh
 350         bltz    a7, .Lsub_opposite_signs
 351
 352 .Lsub_same_sign:
 353         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 354         ball    xh, a6, .Lsub_xnan_or_inf
 355         ball    yh, a6, .Lsub_ynan_or_inf
 356
 357         /* Compare the operands.  In contrast to addition, the entire
 358            value matters here.  */
 359         extui   a7, xh, 20, 11
 360         extui   a8, yh, 20, 11
 361         bltu    xh, yh, .Lsub_xsmaller
 362         beq     xh, yh, .Lsub_compare_low
 363
 364 .Lsub_ysmaller:
 365         /* Check if the smaller (or equal) exponent is zero.  */
 366         bnone   yh, a6, .Lsub_yexpzero
 367
 368         /* Replace yh sign/exponent with 0x001.  */
 369         or      yh, yh, a6
 370         slli    yh, yh, 11
 371         srli    yh, yh, 11
 372
 373 .Lsub_yexpdiff:
 374         /* Compute the exponent difference.  Optimize for difference < 32.  */
 375         sub     a10, a7, a8
 376         bgeui   a10, 32, .Lsub_bigshifty
 377
 378         /* Shift yh/yl right by the exponent difference.  Any bits that are
 379            shifted out of yl are saved in a9 for rounding the result.  */
 380         ssr     a10
 381         movi    a9, 0
 382         src     a9, yl, a9
 383         src     yl, yh, yl
 384         srl     yh, yh
 385
 386 .Lsub_suby:
 387         /* Do the 64-bit subtraction.  */
 388         sub     xh, xh, yh
 389         bgeu    xl, yl, 1f
 390         addi    xh, xh, -1
 391 1:      sub     xl, xl, yl
 392
 393         /* Subtract the leftover bits in a9 from zero and propagate any
 394            borrow from xh/xl.  */
 395         neg     a9, a9
 396         beqz    a9, 1f
 397         addi    a5, xh, -1
 398         moveqz  xh, a5, xl
 399         addi    xl, xl, -1
 400 1:
 401         /* Check if the subtract underflowed into the exponent.  */
 402         extui   a10, xh, 20, 11
 403         beq     a10, a7, .Lsub_round
 404         j       .Lsub_borrow
 405
 406 .Lsub_compare_low:
 407         /* The high words are equal.  Compare the low words.  */
 408         bltu    xl, yl, .Lsub_xsmaller
 409         bltu    yl, xl, .Lsub_ysmaller
 410         /* The operands are equal.  Return 0.0.  */
 411         movi    xh, 0
 412         movi    xl, 0
 413 1:      leaf_return
 414
 415 .Lsub_yexpzero:
 416         /* y is a subnormal value.  Replace its sign/exponent with zero,
 417            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 418            y's apparent exponent because subnormals behave as if they had
 419            the minimum (nonzero) exponent.  */
 420         slli    yh, yh, 12
 421         srli    yh, yh, 12
 422         bnone   xh, a6, .Lsub_yexpdiff
 423         addi    a8, a8, 1
 424         j       .Lsub_yexpdiff
 425
 426 .Lsub_bigshifty:
 427         /* Exponent difference > 64 -- just return the bigger value.  */
 428         bgeui   a10, 64, 1b
 429
 430         /* Shift yh/yl right by the exponent difference.  Any bits that are
 431            shifted out are saved in a9 for rounding the result.  */
 432         ssr     a10
 433         sll     a11, yl         /* lost bits shifted out of yl */
 434         src     a9, yh, yl
 435         srl     yl, yh
 436         movi    yh, 0
 437         beqz    a11, .Lsub_suby
 438         or      a9, a9, a10     /* any positive, nonzero value will work */
 439         j       .Lsub_suby
 440
 441 .Lsub_xsmaller:
 442         /* Same thing as the "ysmaller" code, but with x and y swapped and
 443            with y negated.  */
 444         bnone   xh, a6, .Lsub_xexpzero
 445
 446         or      xh, xh, a6
 447         slli    xh, xh, 11
 448         srli    xh, xh, 11
 449
 450 .Lsub_xexpdiff:
 451         sub     a10, a8, a7
 452         bgeui   a10, 32, .Lsub_bigshiftx
 453
 454         ssr     a10
 455         movi    a9, 0
 456         src     a9, xl, a9
 457         src     xl, xh, xl
 458         srl     xh, xh
 459
 460         /* Negate y.  */
 461         slli    a11, a6, 11
 462         xor     yh, yh, a11
 463
 464 .Lsub_subx:
 465         sub     xl, yl, xl
 466         sub     xh, yh, xh
 467         bgeu    yl, xl, 1f
 468         addi    xh, xh, -1
 469 1:
 470         /* Subtract the leftover bits in a9 from zero and propagate any
 471            borrow from xh/xl.  */
 472         neg     a9, a9
 473         beqz    a9, 1f
 474         addi    a5, xh, -1
 475         moveqz  xh, a5, xl
 476         addi    xl, xl, -1
 477 1:
 478         /* Check if the subtract underflowed into the exponent.  */
 479         extui   a10, xh, 20, 11
 480         bne     a10, a8, .Lsub_borrow
 481
 482 .Lsub_round:
 483         /* Round up if the leftover fraction is >= 1/2.  */
 484         bgez    a9, 1f
 485         addi    xl, xl, 1
 486         beqz    xl, .Lsub_roundcarry
 487
 488         /* Check if the leftover fraction is exactly 1/2.  */
 489         slli    a9, a9, 1
 490         beqz    a9, .Lsub_exactlyhalf
 491 1:      leaf_return
 492
 493 .Lsub_xexpzero:
 494         /* Same as "yexpzero".  */
 495         slli    xh, xh, 12
 496         srli    xh, xh, 12
 497         bnone   yh, a6, .Lsub_xexpdiff
 498         addi    a7, a7, 1
 499         j       .Lsub_xexpdiff
 500
 501 .Lsub_bigshiftx:
 502         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 503            shifted value set so that the subsequent subtraction flips the
 504            sign of y.  */
 505         bgeui   a10, 64, .Lsub_returny
 506
 507         ssr     a10
 508         sll     a11, xl
 509         src     a9, xh, xl
 510         srl     xl, xh
 511         slli    xh, a6, 11      /* set sign bit of xh */
 512         beqz    a11, .Lsub_subx
 513         or      a9, a9, a10
 514         j       .Lsub_subx
 515
 516 .Lsub_returny:
 517         /* Negate and return y.  */
 518         slli    a7, a6, 11
 519         xor     xh, yh, a7
 520         mov     xl, yl
 521         leaf_return
 522
 523 .Lsub_borrow:
 524         /* The subtraction has underflowed into the exponent field, so the
 525            value needs to be renormalized.  Shift the mantissa left as
 526            needed to remove any leading zeros and adjust the exponent
 527            accordingly.  If the exponent is not large enough to remove
 528            all the leading zeros, the result will be a subnormal value.  */
 529
 530         slli    a8, xh, 12
 531         beqz    a8, .Lsub_xhzero
 532         do_nsau a6, a8, a7, a11
 533         srli    a8, a8, 12
 534         bge     a6, a10, .Lsub_subnormal
 535         addi    a6, a6, 1
 536
 537 .Lsub_shift_lt32:
 538         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 539         ssl     a6
 540         src     a8, a8, xl
 541         src     xl, xl, a9
 542         sll     a9, a9
 543
 544         /* Combine the shifted mantissa with the sign and exponent,
 545            decrementing the exponent by a6.  (The exponent has already
 546            been decremented by one due to the borrow from the subtraction,
 547            but adding the mantissa will increment the exponent by one.)  */
 548         srli    xh, xh, 20
 549         sub     xh, xh, a6
 550         slli    xh, xh, 20
 551         add     xh, xh, a8
 552         j       .Lsub_round
 553
 554 .Lsub_exactlyhalf:
 555         /* Round down to the nearest even value.  */
 556         srli    xl, xl, 1
 557         slli    xl, xl, 1
 558         leaf_return
 559
 560 .Lsub_roundcarry:
 561         /* xl is always zero when the rounding increment overflows, so
 562            there's no need to round it to an even value.  */
 563         addi    xh, xh, 1
 564         /* Overflow to the exponent is OK.  */
 565         leaf_return
 566
 567 .Lsub_xhzero:
 568         /* When normalizing the result, all the mantissa bits in the high
 569            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 570         do_nsau a6, xl, a7, a11
 571         addi    a6, a6, 21
 572         blt     a10, a6, .Lsub_subnormal
 573
 574 .Lsub_normalize_shift:
 575         bltui   a6, 32, .Lsub_shift_lt32
 576
 577         ssl     a6
 578         src     a8, xl, a9
 579         sll     xl, a9
 580         movi    a9, 0
 581
 582         srli    xh, xh, 20
 583         sub     xh, xh, a6
 584         slli    xh, xh, 20
 585         add     xh, xh, a8
 586         j       .Lsub_round
 587
 588 .Lsub_subnormal:
 589         /* The exponent is too small to shift away all the leading zeros.
 590            Set a6 to the current exponent (which has already been
 591            decremented by the borrow) so that the exponent of the result
 592            will be zero.  Do not add 1 to a6 in this case, because: (1)
 593            adding the mantissa will not increment the exponent, so there is
 594            no need to subtract anything extra from the exponent to
 595            compensate, and (2) the effective exponent of a subnormal is 1
 596            not 0 so the shift amount must be 1 smaller than normal. */
 597         mov     a6, a10
 598         j       .Lsub_normalize_shift
 599
 600 #endif /* L_addsubdf3 */
 601
 602 #ifdef L_muldf3
 603
 604         /* Multiplication */
 605 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 606 #define XCHAL_NO_MUL 1
 607 #endif
 608
 609 __muldf3_aux:
 610
 611         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 612            (This code is placed before the start of the function just to
 613            keep it in range of the limited branch displacements.)  */
 614
 615 .Lmul_xexpzero:
 616         /* Clear the sign bit of x.  */
 617         slli    xh, xh, 1
 618         srli    xh, xh, 1
 619
 620         /* If x is zero, return zero.  */
 621         or      a10, xh, xl
 622         beqz    a10, .Lmul_return_zero
 623
 624         /* Normalize x.  Adjust the exponent in a8.  */
 625         beqz    xh, .Lmul_xh_zero
 626         do_nsau a10, xh, a11, a12
 627         addi    a10, a10, -11
 628         ssl     a10
 629         src     xh, xh, xl
 630         sll     xl, xl
 631         movi    a8, 1
 632         sub     a8, a8, a10
 633         j       .Lmul_xnormalized
 634 .Lmul_xh_zero:
 635         do_nsau a10, xl, a11, a12
 636         addi    a10, a10, -11
 637         movi    a8, -31
 638         sub     a8, a8, a10
 639         ssl     a10
 640         bltz    a10, .Lmul_xl_srl
 641         sll     xh, xl
 642         movi    xl, 0
 643         j       .Lmul_xnormalized
 644 .Lmul_xl_srl:
 645         srl     xh, xl
 646         sll     xl, xl
 647         j       .Lmul_xnormalized
 648
 649 .Lmul_yexpzero:
 650         /* Clear the sign bit of y.  */
 651         slli    yh, yh, 1
 652         srli    yh, yh, 1
 653
 654         /* If y is zero, return zero.  */
 655         or      a10, yh, yl
 656         beqz    a10, .Lmul_return_zero
 657
 658         /* Normalize y.  Adjust the exponent in a9.  */
 659         beqz    yh, .Lmul_yh_zero
 660         do_nsau a10, yh, a11, a12
 661         addi    a10, a10, -11
 662         ssl     a10
 663         src     yh, yh, yl
 664         sll     yl, yl
 665         movi    a9, 1
 666         sub     a9, a9, a10
 667         j       .Lmul_ynormalized
 668 .Lmul_yh_zero:
 669         do_nsau a10, yl, a11, a12
 670         addi    a10, a10, -11
 671         movi    a9, -31
 672         sub     a9, a9, a10
 673         ssl     a10
 674         bltz    a10, .Lmul_yl_srl
 675         sll     yh, yl
 676         movi    yl, 0
 677         j       .Lmul_ynormalized
 678 .Lmul_yl_srl:
 679         srl     yh, yl
 680         sll     yl, yl
 681         j       .Lmul_ynormalized
 682
 683 .Lmul_return_zero:
 684         /* Return zero with the appropriate sign bit.  */
 685         srli    xh, a7, 31
 686         slli    xh, xh, 31
 687         movi    xl, 0
 688         j       .Lmul_done
 689
 690 .Lmul_xnan_or_inf:
 691         /* If y is zero, return NaN.  */
 692         bnez    yl, 1f
 693         slli    a8, yh, 1
 694         bnez    a8, 1f
 695         movi    a4, 0x80000     /* make it a quiet NaN */
 696         or      xh, xh, a4
 697         j       .Lmul_done
 698 1:
 699         /* If y is NaN, return y.  */
 700         bnall   yh, a6, .Lmul_returnx
 701         slli    a8, yh, 12
 702         or      a8, a8, yl
 703         beqz    a8, .Lmul_returnx
 704
 705 .Lmul_returny:
 706         mov     xh, yh
 707         mov     xl, yl
 708
 709 .Lmul_returnx:
 710         /* Set the sign bit and return.  */
 711         extui   a7, a7, 31, 1
 712         slli    xh, xh, 1
 713         ssai    1
 714         src     xh, a7, xh
 715         j       .Lmul_done
 716
 717 .Lmul_ynan_or_inf:
 718         /* If x is zero, return NaN.  */
 719         bnez    xl, .Lmul_returny
 720         slli    a8, xh, 1
 721         bnez    a8, .Lmul_returny
 722         movi    a7, 0x80000     /* make it a quiet NaN */
 723         or      xh, yh, a7
 724         j       .Lmul_done
 725
 726         .align  4
 727         .global __muldf3
 728         .type   __muldf3, @function
 729 __muldf3:
 730 #if __XTENSA_CALL0_ABI__
 731         leaf_entry sp, 32
 732         addi    sp, sp, -32
 733         s32i    a12, sp, 16
 734         s32i    a13, sp, 20
 735         s32i    a14, sp, 24
 736         s32i    a15, sp, 28
 737 #elif XCHAL_NO_MUL
 738         /* This is not really a leaf function; allocate enough stack space
 739            to allow CALL12s to a helper function.  */
 740         leaf_entry sp, 64
 741 #else
 742         leaf_entry sp, 32
 743 #endif
 744         movi    a6, 0x7ff00000
 745
 746         /* Get the sign of the result.  */
 747         xor     a7, xh, yh
 748
 749         /* Check for NaN and infinity.  */
 750         ball    xh, a6, .Lmul_xnan_or_inf
 751         ball    yh, a6, .Lmul_ynan_or_inf
 752
 753         /* Extract the exponents.  */
 754         extui   a8, xh, 20, 11
 755         extui   a9, yh, 20, 11
 756
 757         beqz    a8, .Lmul_xexpzero
 758 .Lmul_xnormalized:
 759         beqz    a9, .Lmul_yexpzero
 760 .Lmul_ynormalized:
 761
 762         /* Add the exponents.  */
 763         add     a8, a8, a9
 764
 765         /* Replace sign/exponent fields with explicit "1.0".  */
 766         movi    a10, 0x1fffff
 767         or      xh, xh, a6
 768         and     xh, xh, a10
 769         or      yh, yh, a6
 770         and     yh, yh, a10
 771
 772         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 773            The least-significant word of the result is thrown away except
 774            that if it is nonzero, the lsb of a6 is set to 1.  */
 775 #if XCHAL_HAVE_MUL32_HIGH
 776
 777         /* Compute a6 with any carry-outs in a10.  */
 778         movi    a10, 0
 779         mull    a6, xl, yh
 780         mull    a11, xh, yl
 781         add     a6, a6, a11
 782         bgeu    a6, a11, 1f
 783         addi    a10, a10, 1
 784 1:
 785         muluh   a11, xl, yl
 786         add     a6, a6, a11
 787         bgeu    a6, a11, 1f
 788         addi    a10, a10, 1
 789 1:
 790         /* If the low word of the result is nonzero, set the lsb of a6.  */
 791         mull    a11, xl, yl
 792         beqz    a11, 1f
 793         movi    a9, 1
 794         or      a6, a6, a9
 795 1:
 796         /* Compute xl with any carry-outs in a9.  */
 797         movi    a9, 0
 798         mull    a11, xh, yh
 799         add     a10, a10, a11
 800         bgeu    a10, a11, 1f
 801         addi    a9, a9, 1
 802 1:
 803         muluh   a11, xh, yl
 804         add     a10, a10, a11
 805         bgeu    a10, a11, 1f
 806         addi    a9, a9, 1
 807 1:
 808         muluh   xl, xl, yh
 809         add     xl, xl, a10
 810         bgeu    xl, a10, 1f
 811         addi    a9, a9, 1
 812 1:
 813         /* Compute xh.  */
 814         muluh   xh, xh, yh
 815         add     xh, xh, a9
 816
 817 #else /* ! XCHAL_HAVE_MUL32_HIGH */
 818
 819         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 820            products.  These partial products are:
 821
 822                 0 xll * yll
 823
 824                 1 xll * ylh
 825                 2 xlh * yll
 826
 827                 3 xll * yhl
 828                 4 xlh * ylh
 829                 5 xhl * yll
 830
 831                 6 xll * yhh
 832                 7 xlh * yhl
 833                 8 xhl * ylh
 834                 9 xhh * yll
 835
 836                 10 xlh * yhh
 837                 11 xhl * yhl
 838                 12 xhh * ylh
 839
 840                 13 xhl * yhh
 841                 14 xhh * yhl
 842
 843                 15 xhh * yhh
 844
 845            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 846            or Mul32 multiplier options, these input chunks must be stored in
 847            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 848            that the inputs come from either half of the registers, so there
 849            is no need to shift them out ahead of time.  If there is no
 850            multiply hardware, the 16-bit chunks can be extracted when setting
 851            up the arguments to the separate multiply function.  */
 852
 853         /* Save a7 since it is needed to hold a temporary value.  */
 854         s32i    a7, sp, 4
 855 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 856         /* Calling a separate multiply function will clobber a0 and requires
 857            use of a8 as a temporary, so save those values now.  (The function
 858            uses a custom ABI so nothing else needs to be saved.)  */
 859         s32i    a0, sp, 0
 860         s32i    a8, sp, 8
 861 #endif
 862
 863 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 864
 865 #define xlh a12
 866 #define ylh a13
 867 #define xhh a14
 868 #define yhh a15
 869
 870         /* Get the high halves of the inputs into registers.  */
 871         srli    xlh, xl, 16
 872         srli    ylh, yl, 16
 873         srli    xhh, xh, 16
 874         srli    yhh, yh, 16
 875
 876 #define xll xl
 877 #define yll yl
 878 #define xhl xh
 879 #define yhl yh
 880
 881 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 882         /* Clear the high halves of the inputs.  This does not matter
 883            for MUL16 because the high bits are ignored.  */
 884         extui   xl, xl, 0, 16
 885         extui   xh, xh, 0, 16
 886         extui   yl, yl, 0, 16
 887         extui   yh, yh, 0, 16
 888 #endif
 889 #endif /* MUL16 || MUL32 */
 890
 891
 892 #if XCHAL_HAVE_MUL16
 893
 894 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 895         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 896
 897 #elif XCHAL_HAVE_MUL32
 898
 899 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 900         mull    dst, xreg ## xhalf, yreg ## yhalf
 901
 902 #elif XCHAL_HAVE_MAC16
 903
 904 /* The preprocessor insists on inserting a space when concatenating after
 905    a period in the definition of do_mul below.  These macros are a workaround
 906    using underscores instead of periods when doing the concatenation.  */
 907 #define umul_aa_ll umul.aa.ll
 908 #define umul_aa_lh umul.aa.lh
 909 #define umul_aa_hl umul.aa.hl
 910 #define umul_aa_hh umul.aa.hh
 911
 912 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 913         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 914         rsr     dst, ACCLO
 915
 916 #else /* no multiply hardware */
 917
 918 #define set_arg_l(dst, src) \
 919         extui   dst, src, 0, 16
 920 #define set_arg_h(dst, src) \
 921         srli    dst, src, 16
 922
 923 #if __XTENSA_CALL0_ABI__
 924 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 925         set_arg_ ## xhalf (a13, xreg); \
 926         set_arg_ ## yhalf (a14, yreg); \
 927         call0   .Lmul_mulsi3; \
 928         mov     dst, a12
 929 #else
 930 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 931         set_arg_ ## xhalf (a14, xreg); \
 932         set_arg_ ## yhalf (a15, yreg); \
 933         call12  .Lmul_mulsi3; \
 934         mov     dst, a14
 935 #endif /* __XTENSA_CALL0_ABI__ */
 936
 937 #endif /* no multiply hardware */
 938
 939         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 940         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 941         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 942         movi    a9, 0
 943         add     a10, a10, a11
 944         bgeu    a10, a11, 1f
 945         addi    a9, a9, 1
 946 1:
 947         /* Initialize a6 with a9/a10 shifted into position.  Note that
 948            this value can be safely incremented without any carry-outs.  */
 949         ssai    16
 950         src     a6, a9, a10
 951
 952         /* Compute the low word into a10.  */
 953         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 954         sll     a10, a10
 955         add     a10, a10, a11
 956         bgeu    a10, a11, 1f
 957         addi    a6, a6, 1
 958 1:
 959         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 960            This is good enough to determine the low half of a6, so that any
 961            nonzero bits from the low word of the result can be collapsed
 962            into a6, freeing up a register.  */
 963         movi    a9, 0
 964         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 965         add     a6, a6, a11
 966         bgeu    a6, a11, 1f
 967         addi    a9, a9, 1
 968 1:
 969         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 970         add     a6, a6, a11
 971         bgeu    a6, a11, 1f
 972         addi    a9, a9, 1
 973 1:
 974         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 975         add     a6, a6, a11
 976         bgeu    a6, a11, 1f
 977         addi    a9, a9, 1
 978 1:
 979         /* Collapse any nonzero bits from the low word into a6.  */
 980         beqz    a10, 1f
 981         movi    a11, 1
 982         or      a6, a6, a11
 983 1:
 984         /* Add pp6-9 into a11 with carry-outs in a10.  */
 985         do_mul(a7, xl, l, yh, h)        /* pp 6 */
 986         do_mul(a11, xh, h, yl, l)       /* pp 9 */
 987         movi    a10, 0
 988         add     a11, a11, a7
 989         bgeu    a11, a7, 1f
 990         addi    a10, a10, 1
 991 1:
 992         do_mul(a7, xl, h, yh, l)        /* pp 7 */
 993         add     a11, a11, a7
 994         bgeu    a11, a7, 1f
 995         addi    a10, a10, 1
 996 1:
 997         do_mul(a7, xh, l, yl, h)        /* pp 8 */
 998         add     a11, a11, a7
 999         bgeu    a11, a7, 1f
1000         addi    a10, a10, 1
1001 1:
1002         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1003         src     a10, a10, a11
1004         add     a10, a10, a9
1005         sll     a11, a11
1006         add     a6, a6, a11
1007         bgeu    a6, a11, 1f
1008         addi    a10, a10, 1
1009 1:
1010         /* Add pp10-12 into xl with carry-outs in a9.  */
1011         movi    a9, 0
1012         do_mul(xl, xl, h, yh, h)        /* pp 10 */
1013         add     xl, xl, a10
1014         bgeu    xl, a10, 1f
1015         addi    a9, a9, 1
1016 1:
1017         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1018         add     xl, xl, a10
1019         bgeu    xl, a10, 1f
1020         addi    a9, a9, 1
1021 1:
1022         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1023         add     xl, xl, a10
1024         bgeu    xl, a10, 1f
1025         addi    a9, a9, 1
1026 1:
1027         /* Add pp13-14 into a11 with carry-outs in a10.  */
1028         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1029         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1030         movi    a10, 0
1031         add     a11, a11, a7
1032         bgeu    a11, a7, 1f
1033         addi    a10, a10, 1
1034 1:
1035         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1036         src     a10, a10, a11
1037         add     a10, a10, a9
1038         sll     a11, a11
1039         add     xl, xl, a11
1040         bgeu    xl, a11, 1f
1041         addi    a10, a10, 1
1042 1:
1043         /* Compute xh.  */
1044         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1045         add     xh, xh, a10
1046
1047         /* Restore values saved on the stack during the multiplication.  */
1048         l32i    a7, sp, 4
1049 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1050         l32i    a0, sp, 0
1051         l32i    a8, sp, 8
1052 #endif
1053 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1054
1055         /* Shift left by 12 bits, unless there was a carry-out from the
1056            multiply, in which case, shift by 11 bits and increment the
1057            exponent.  Note: It is convenient to use the constant 0x3ff
1058            instead of 0x400 when removing the extra exponent bias (so that
1059            it is easy to construct 0x7fe for the overflow check).  Reverse
1060            the logic here to decrement the exponent sum by one unless there
1061            was a carry-out.  */
1062         movi    a4, 11
1063         srli    a5, xh, 21 - 12
1064         bnez    a5, 1f
1065         addi    a4, a4, 1
1066         addi    a8, a8, -1
1067 1:      ssl     a4
1068         src     xh, xh, xl
1069         src     xl, xl, a6
1070         sll     a6, a6
1071
1072         /* Subtract the extra bias from the exponent sum (plus one to account
1073            for the explicit "1.0" of the mantissa that will be added to the
1074            exponent in the final result).  */
1075         movi    a4, 0x3ff
1076         sub     a8, a8, a4
1077
1078         /* Check for over/underflow.  The value in a8 is one less than the
1079            final exponent, so values in the range 0..7fd are OK here.  */
1080         slli    a4, a4, 1       /* 0x7fe */
1081         bgeu    a8, a4, .Lmul_overflow
1082
1083 .Lmul_round:
1084         /* Round.  */
1085         bgez    a6, .Lmul_rounded
1086         addi    xl, xl, 1
1087         beqz    xl, .Lmul_roundcarry
1088         slli    a6, a6, 1
1089         beqz    a6, .Lmul_exactlyhalf
1090
1091 .Lmul_rounded:
1092         /* Add the exponent to the mantissa.  */
1093         slli    a8, a8, 20
1094         add     xh, xh, a8
1095
1096 .Lmul_addsign:
1097         /* Add the sign bit.  */
1098         srli    a7, a7, 31
1099         slli    a7, a7, 31
1100         or      xh, xh, a7
1101
1102 .Lmul_done:
1103 #if __XTENSA_CALL0_ABI__
1104         l32i    a12, sp, 16
1105         l32i    a13, sp, 20
1106         l32i    a14, sp, 24
1107         l32i    a15, sp, 28
1108         addi    sp, sp, 32
1109 #endif
1110         leaf_return
1111
1112 .Lmul_exactlyhalf:
1113         /* Round down to the nearest even value.  */
1114         srli    xl, xl, 1
1115         slli    xl, xl, 1
1116         j       .Lmul_rounded
1117
1118 .Lmul_roundcarry:
1119         /* xl is always zero when the rounding increment overflows, so
1120            there's no need to round it to an even value.  */
1121         addi    xh, xh, 1
1122         /* Overflow is OK -- it will be added to the exponent.  */
1123         j       .Lmul_rounded
1124
1125 .Lmul_overflow:
1126         bltz    a8, .Lmul_underflow
1127         /* Return +/- Infinity.  */
1128         addi    a8, a4, 1       /* 0x7ff */
1129         slli    xh, a8, 20
1130         movi    xl, 0
1131         j       .Lmul_addsign
1132
1133 .Lmul_underflow:
1134         /* Create a subnormal value, where the exponent field contains zero,
1135            but the effective exponent is 1.  The value of a8 is one less than
1136            the actual exponent, so just negate it to get the shift amount.  */
1137         neg     a8, a8
1138         mov     a9, a6
1139         ssr     a8
1140         bgeui   a8, 32, .Lmul_bigshift
1141
1142         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1143            in a6 (combined with the shifted-out bits currently in a6) for
1144            rounding the result.  */
1145         sll     a6, xl
1146         src     xl, xh, xl
1147         srl     xh, xh
1148         j       1f
1149
1150 .Lmul_bigshift:
1151         bgeui   a8, 64, .Lmul_flush_to_zero
1152         sll     a10, xl         /* lost bits shifted out of xl */
1153         src     a6, xh, xl
1154         srl     xl, xh
1155         movi    xh, 0
1156         or      a9, a9, a10
1157
1158         /* Set the exponent to zero.  */
1159 1:      movi    a8, 0
1160
1161         /* Pack any nonzero bits shifted out into a6.  */
1162         beqz    a9, .Lmul_round
1163         movi    a9, 1
1164         or      a6, a6, a9
1165         j       .Lmul_round
1166
1167 .Lmul_flush_to_zero:
1168         /* Return zero with the appropriate sign bit.  */
1169         srli    xh, a7, 31
1170         slli    xh, xh, 31
1171         movi    xl, 0
1172         j       .Lmul_done
1173
1174 #if XCHAL_NO_MUL
1175
1176         /* For Xtensa processors with no multiply hardware, this simplified
1177            version of _mulsi3 is used for multiplying 16-bit chunks of
1178            the floating-point mantissas.  When using CALL0, this function
1179            uses a custom ABI: the inputs are passed in a13 and a14, the
1180            result is returned in a12, and a8 and a15 are clobbered.  */
1181         .align  4
1182 .Lmul_mulsi3:
1183         leaf_entry sp, 16
1184         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1185         movi    \dst, 0
1186 1:      add     \tmp1, \src2, \dst
1187         extui   \tmp2, \src1, 0, 1
1188         movnez  \dst, \tmp1, \tmp2
1189
1190         do_addx2 \tmp1, \src2, \dst, \tmp1
1191         extui   \tmp2, \src1, 1, 1
1192         movnez  \dst, \tmp1, \tmp2
1193
1194         do_addx4 \tmp1, \src2, \dst, \tmp1
1195         extui   \tmp2, \src1, 2, 1
1196         movnez  \dst, \tmp1, \tmp2
1197
1198         do_addx8 \tmp1, \src2, \dst, \tmp1
1199         extui   \tmp2, \src1, 3, 1
1200         movnez  \dst, \tmp1, \tmp2
1201
1202         srli    \src1, \src1, 4
1203         slli    \src2, \src2, 4
1204         bnez    \src1, 1b
1205         .endm
1206 #if __XTENSA_CALL0_ABI__
1207         mul_mulsi3_body a12, a13, a14, a15, a8
1208 #else
1209         /* The result will be written into a2, so save that argument in a4.  */
1210         mov     a4, a2
1211         mul_mulsi3_body a2, a4, a3, a5, a6
1212 #endif
1213         leaf_return
1214 #endif /* XCHAL_NO_MUL */
1215 #endif /* L_muldf3 */
1216
1217 #ifdef L_divdf3
1218
1219         /* Division */
1220 __divdf3_aux:
1221
1222         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1223            (This code is placed before the start of the function just to
1224            keep it in range of the limited branch displacements.)  */
1225
1226 .Ldiv_yexpzero:
1227         /* Clear the sign bit of y.  */
1228         slli    yh, yh, 1
1229         srli    yh, yh, 1
1230
1231         /* Check for division by zero.  */
1232         or      a10, yh, yl
1233         beqz    a10, .Ldiv_yzero
1234
1235         /* Normalize y.  Adjust the exponent in a9.  */
1236         beqz    yh, .Ldiv_yh_zero
1237         do_nsau a10, yh, a11, a9
1238         addi    a10, a10, -11
1239         ssl     a10
1240         src     yh, yh, yl
1241         sll     yl, yl
1242         movi    a9, 1
1243         sub     a9, a9, a10
1244         j       .Ldiv_ynormalized
1245 .Ldiv_yh_zero:
1246         do_nsau a10, yl, a11, a9
1247         addi    a10, a10, -11
1248         movi    a9, -31
1249         sub     a9, a9, a10
1250         ssl     a10
1251         bltz    a10, .Ldiv_yl_srl
1252         sll     yh, yl
1253         movi    yl, 0
1254         j       .Ldiv_ynormalized
1255 .Ldiv_yl_srl:
1256         srl     yh, yl
1257         sll     yl, yl
1258         j       .Ldiv_ynormalized
1259
1260 .Ldiv_yzero:
1261         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1262         slli    xh, xh, 1
1263         srli    xh, xh, 1
1264         or      xl, xl, xh
1265         srli    xh, a7, 31
1266         slli    xh, xh, 31
1267         or      xh, xh, a6
1268         bnez    xl, 1f
1269         movi    a4, 0x80000     /* make it a quiet NaN */
1270         or      xh, xh, a4
1271 1:      movi    xl, 0
1272         leaf_return
1273
1274 .Ldiv_xexpzero:
1275         /* Clear the sign bit of x.  */
1276         slli    xh, xh, 1
1277         srli    xh, xh, 1
1278
1279         /* If x is zero, return zero.  */
1280         or      a10, xh, xl
1281         beqz    a10, .Ldiv_return_zero
1282
1283         /* Normalize x.  Adjust the exponent in a8.  */
1284         beqz    xh, .Ldiv_xh_zero
1285         do_nsau a10, xh, a11, a8
1286         addi    a10, a10, -11
1287         ssl     a10
1288         src     xh, xh, xl
1289         sll     xl, xl
1290         movi    a8, 1
1291         sub     a8, a8, a10
1292         j       .Ldiv_xnormalized
1293 .Ldiv_xh_zero:
1294         do_nsau a10, xl, a11, a8
1295         addi    a10, a10, -11
1296         movi    a8, -31
1297         sub     a8, a8, a10
1298         ssl     a10
1299         bltz    a10, .Ldiv_xl_srl
1300         sll     xh, xl
1301         movi    xl, 0
1302         j       .Ldiv_xnormalized
1303 .Ldiv_xl_srl:
1304         srl     xh, xl
1305         sll     xl, xl
1306         j       .Ldiv_xnormalized
1307
1308 .Ldiv_return_zero:
1309         /* Return zero with the appropriate sign bit.  */
1310         srli    xh, a7, 31
1311         slli    xh, xh, 31
1312         movi    xl, 0
1313         leaf_return
1314
1315 .Ldiv_xnan_or_inf:
1316         /* Set the sign bit of the result.  */
1317         srli    a7, yh, 31
1318         slli    a7, a7, 31
1319         xor     xh, xh, a7
1320         /* If y is NaN or Inf, return NaN.  */
1321         bnall   yh, a6, 1f
1322         movi    a4, 0x80000     /* make it a quiet NaN */
1323         or      xh, xh, a4
1324 1:      leaf_return
1325
1326 .Ldiv_ynan_or_inf:
1327         /* If y is Infinity, return zero.  */
1328         slli    a8, yh, 12
1329         or      a8, a8, yl
1330         beqz    a8, .Ldiv_return_zero
1331         /* y is NaN; return it.  */
1332         mov     xh, yh
1333         mov     xl, yl
1334         leaf_return
1335
1336 .Ldiv_highequal1:
1337         bltu    xl, yl, 2f
1338         j       3f
1339
1340         .align  4
1341         .global __divdf3
1342         .type   __divdf3, @function
1343 __divdf3:
1344         leaf_entry sp, 16
1345         movi    a6, 0x7ff00000
1346
1347         /* Get the sign of the result.  */
1348         xor     a7, xh, yh
1349
1350         /* Check for NaN and infinity.  */
1351         ball    xh, a6, .Ldiv_xnan_or_inf
1352         ball    yh, a6, .Ldiv_ynan_or_inf
1353
1354         /* Extract the exponents.  */
1355         extui   a8, xh, 20, 11
1356         extui   a9, yh, 20, 11
1357
1358         beqz    a9, .Ldiv_yexpzero
1359 .Ldiv_ynormalized:
1360         beqz    a8, .Ldiv_xexpzero
1361 .Ldiv_xnormalized:
1362
1363         /* Subtract the exponents.  */
1364         sub     a8, a8, a9
1365
1366         /* Replace sign/exponent fields with explicit "1.0".  */
1367         movi    a10, 0x1fffff
1368         or      xh, xh, a6
1369         and     xh, xh, a10
1370         or      yh, yh, a6
1371         and     yh, yh, a10
1372
1373         /* Set SAR for left shift by one.  */
1374         ssai    (32 - 1)
1375
1376         /* The first digit of the mantissa division must be a one.
1377            Shift x (and adjust the exponent) as needed to make this true.  */
1378         bltu    yh, xh, 3f
1379         beq     yh, xh, .Ldiv_highequal1
1380 2:      src     xh, xh, xl
1381         sll     xl, xl
1382         addi    a8, a8, -1
1383 3:
1384         /* Do the first subtraction and shift.  */
1385         sub     xh, xh, yh
1386         bgeu    xl, yl, 1f
1387         addi    xh, xh, -1
1388 1:      sub     xl, xl, yl
1389         src     xh, xh, xl
1390         sll     xl, xl
1391
1392         /* Put the quotient into a10/a11.  */
1393         movi    a10, 0
1394         movi    a11, 1
1395
1396         /* Divide one bit at a time for 52 bits.  */
1397         movi    a9, 52
1398 #if XCHAL_HAVE_LOOPS
1399         loop    a9, .Ldiv_loopend
1400 #endif
1401 .Ldiv_loop:
1402         /* Shift the quotient << 1.  */
1403         src     a10, a10, a11
1404         sll     a11, a11
1405
1406         /* Is this digit a 0 or 1?  */
1407         bltu    xh, yh, 3f
1408         beq     xh, yh, .Ldiv_highequal2
1409
1410         /* Output a 1 and subtract.  */
1411 2:      addi    a11, a11, 1
1412         sub     xh, xh, yh
1413         bgeu    xl, yl, 1f
1414         addi    xh, xh, -1
1415 1:      sub     xl, xl, yl
1416
1417         /* Shift the dividend << 1.  */
1418 3:      src     xh, xh, xl
1419         sll     xl, xl
1420
1421 #if !XCHAL_HAVE_LOOPS
1422         addi    a9, a9, -1
1423         bnez    a9, .Ldiv_loop
1424 #endif
1425 .Ldiv_loopend:
1426
1427         /* Add the exponent bias (less one to account for the explicit "1.0"
1428            of the mantissa that will be added to the exponent in the final
1429            result).  */
1430         movi    a9, 0x3fe
1431         add     a8, a8, a9
1432
1433         /* Check for over/underflow.  The value in a8 is one less than the
1434            final exponent, so values in the range 0..7fd are OK here.  */
1435         addmi   a9, a9, 0x400   /* 0x7fe */
1436         bgeu    a8, a9, .Ldiv_overflow
1437
1438 .Ldiv_round:
1439         /* Round.  The remainder (<< 1) is in xh/xl.  */
1440         bltu    xh, yh, .Ldiv_rounded
1441         beq     xh, yh, .Ldiv_highequal3
1442 .Ldiv_roundup:
1443         addi    a11, a11, 1
1444         beqz    a11, .Ldiv_roundcarry
1445
1446 .Ldiv_rounded:
1447         mov     xl, a11
1448         /* Add the exponent to the mantissa.  */
1449         slli    a8, a8, 20
1450         add     xh, a10, a8
1451
1452 .Ldiv_addsign:
1453         /* Add the sign bit.  */
1454         srli    a7, a7, 31
1455         slli    a7, a7, 31
1456         or      xh, xh, a7
1457         leaf_return
1458
1459 .Ldiv_highequal2:
1460         bgeu    xl, yl, 2b
1461         j       3b
1462
1463 .Ldiv_highequal3:
1464         bltu    xl, yl, .Ldiv_rounded
1465         bne     xl, yl, .Ldiv_roundup
1466
1467         /* Remainder is exactly half the divisor.  Round even.  */
1468         addi    a11, a11, 1
1469         beqz    a11, .Ldiv_roundcarry
1470         srli    a11, a11, 1
1471         slli    a11, a11, 1
1472         j       .Ldiv_rounded
1473
1474 .Ldiv_overflow:
1475         bltz    a8, .Ldiv_underflow
1476         /* Return +/- Infinity.  */
1477         addi    a8, a9, 1       /* 0x7ff */
1478         slli    xh, a8, 20
1479         movi    xl, 0
1480         j       .Ldiv_addsign
1481
1482 .Ldiv_underflow:
1483         /* Create a subnormal value, where the exponent field contains zero,
1484            but the effective exponent is 1.  The value of a8 is one less than
1485            the actual exponent, so just negate it to get the shift amount.  */
1486         neg     a8, a8
1487         ssr     a8
1488         bgeui   a8, 32, .Ldiv_bigshift
1489
1490         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1491            saved in a6 for rounding the result.  */
1492         sll     a6, a11
1493         src     a11, a10, a11
1494         srl     a10, a10
1495         j       1f
1496
1497 .Ldiv_bigshift:
1498         bgeui   a8, 64, .Ldiv_flush_to_zero
1499         sll     a9, a11         /* lost bits shifted out of a11 */
1500         src     a6, a10, a11
1501         srl     a11, a10
1502         movi    a10, 0
1503         or      xl, xl, a9
1504
1505         /* Set the exponent to zero.  */
1506 1:      movi    a8, 0
1507
1508         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1509         or      xh, xh, xl
1510         beqz    xh, 1f
1511         movi    a9, 1
1512         or      a6, a6, a9
1513
1514         /* Round a10/a11 based on the bits shifted out into a6.  */
1515 1:      bgez    a6, .Ldiv_rounded
1516         addi    a11, a11, 1
1517         beqz    a11, .Ldiv_roundcarry
1518         slli    a6, a6, 1
1519         bnez    a6, .Ldiv_rounded
1520         srli    a11, a11, 1
1521         slli    a11, a11, 1
1522         j       .Ldiv_rounded
1523
1524 .Ldiv_roundcarry:
1525         /* a11 is always zero when the rounding increment overflows, so
1526            there's no need to round it to an even value.  */
1527         addi    a10, a10, 1
1528         /* Overflow to the exponent field is OK.  */
1529         j       .Ldiv_rounded
1530
1531 .Ldiv_flush_to_zero:
1532         /* Return zero with the appropriate sign bit.  */
1533         srli    xh, a7, 31
1534         slli    xh, xh, 31
1535         movi    xl, 0
1536         leaf_return
1537
1538 #endif /* L_divdf3 */
1539
1540 #ifdef L_cmpdf2
1541
1542         /* Equal and Not Equal */
1543
1544         .align  4
1545         .global __eqdf2
1546         .global __nedf2
1547         .set    __nedf2, __eqdf2
1548         .type   __eqdf2, @function
1549 __eqdf2:
1550         leaf_entry sp, 16
1551         bne     xl, yl, 2f
1552         bne     xh, yh, 4f
1553
1554         /* The values are equal but NaN != NaN.  Check the exponent.  */
1555         movi    a6, 0x7ff00000
1556         ball    xh, a6, 3f
1557
1558         /* Equal.  */
1559         movi    a2, 0
1560         leaf_return
1561
1562         /* Not equal.  */
1563 2:      movi    a2, 1
1564         leaf_return
1565
1566         /* Check if the mantissas are nonzero.  */
1567 3:      slli    a7, xh, 12
1568         or      a7, a7, xl
1569         j       5f
1570
1571         /* Check if x and y are zero with different signs.  */
1572 4:      or      a7, xh, yh
1573         slli    a7, a7, 1
1574         or      a7, a7, xl      /* xl == yl here */
1575
1576         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1577            or x when exponent(x) = 0x7ff and x == y.  */
1578 5:      movi    a2, 0
1579         movi    a3, 1
1580         movnez  a2, a3, a7
1581         leaf_return
1582
1583
1584         /* Greater Than */
1585
1586         .align  4
1587         .global __gtdf2
1588         .type   __gtdf2, @function
1589 __gtdf2:
1590         leaf_entry sp, 16
1591         movi    a6, 0x7ff00000
1592         ball    xh, a6, 2f
1593 1:      bnall   yh, a6, .Lle_cmp
1594
1595         /* Check if y is a NaN.  */
1596         slli    a7, yh, 12
1597         or      a7, a7, yl
1598         beqz    a7, .Lle_cmp
1599         movi    a2, 0
1600         leaf_return
1601
1602         /* Check if x is a NaN.  */
1603 2:      slli    a7, xh, 12
1604         or      a7, a7, xl
1605         beqz    a7, 1b
1606         movi    a2, 0
1607         leaf_return
1608
1609
1610         /* Less Than or Equal */
1611
1612         .align  4
1613         .global __ledf2
1614         .type   __ledf2, @function
1615 __ledf2:
1616         leaf_entry sp, 16
1617         movi    a6, 0x7ff00000
1618         ball    xh, a6, 2f
1619 1:      bnall   yh, a6, .Lle_cmp
1620
1621         /* Check if y is a NaN.  */
1622         slli    a7, yh, 12
1623         or      a7, a7, yl
1624         beqz    a7, .Lle_cmp
1625         movi    a2, 1
1626         leaf_return
1627
1628         /* Check if x is a NaN.  */
1629 2:      slli    a7, xh, 12
1630         or      a7, a7, xl
1631         beqz    a7, 1b
1632         movi    a2, 1
1633         leaf_return
1634
1635 .Lle_cmp:
1636         /* Check if x and y have different signs.  */
1637         xor     a7, xh, yh
1638         bltz    a7, .Lle_diff_signs
1639
1640         /* Check if x is negative.  */
1641         bltz    xh, .Lle_xneg
1642
1643         /* Check if x <= y.  */
1644         bltu    xh, yh, 4f
1645         bne     xh, yh, 5f
1646         bltu    yl, xl, 5f
1647 4:      movi    a2, 0
1648         leaf_return
1649
1650 .Lle_xneg:
1651         /* Check if y <= x.  */
1652         bltu    yh, xh, 4b
1653         bne     yh, xh, 5f
1654         bgeu    xl, yl, 4b
1655 5:      movi    a2, 1
1656         leaf_return
1657
1658 .Lle_diff_signs:
1659         bltz    xh, 4b
1660
1661         /* Check if both x and y are zero.  */
1662         or      a7, xh, yh
1663         slli    a7, a7, 1
1664         or      a7, a7, xl
1665         or      a7, a7, yl
1666         movi    a2, 1
1667         movi    a3, 0
1668         moveqz  a2, a3, a7
1669         leaf_return
1670
1671
1672         /* Greater Than or Equal */
1673
1674         .align  4
1675         .global __gedf2
1676         .type   __gedf2, @function
1677 __gedf2:
1678         leaf_entry sp, 16
1679         movi    a6, 0x7ff00000
1680         ball    xh, a6, 2f
1681 1:      bnall   yh, a6, .Llt_cmp
1682
1683         /* Check if y is a NaN.  */
1684         slli    a7, yh, 12
1685         or      a7, a7, yl
1686         beqz    a7, .Llt_cmp
1687         movi    a2, -1
1688         leaf_return
1689
1690         /* Check if x is a NaN.  */
1691 2:      slli    a7, xh, 12
1692         or      a7, a7, xl
1693         beqz    a7, 1b
1694         movi    a2, -1
1695         leaf_return
1696
1697
1698         /* Less Than */
1699
1700         .align  4
1701         .global __ltdf2
1702         .type   __ltdf2, @function
1703 __ltdf2:
1704         leaf_entry sp, 16
1705         movi    a6, 0x7ff00000
1706         ball    xh, a6, 2f
1707 1:      bnall   yh, a6, .Llt_cmp
1708
1709         /* Check if y is a NaN.  */
1710         slli    a7, yh, 12
1711         or      a7, a7, yl
1712         beqz    a7, .Llt_cmp
1713         movi    a2, 0
1714         leaf_return
1715
1716         /* Check if x is a NaN.  */
1717 2:      slli    a7, xh, 12
1718         or      a7, a7, xl
1719         beqz    a7, 1b
1720         movi    a2, 0
1721         leaf_return
1722
1723 .Llt_cmp:
1724         /* Check if x and y have different signs.  */
1725         xor     a7, xh, yh
1726         bltz    a7, .Llt_diff_signs
1727
1728         /* Check if x is negative.  */
1729         bltz    xh, .Llt_xneg
1730
1731         /* Check if x < y.  */
1732         bltu    xh, yh, 4f
1733         bne     xh, yh, 5f
1734         bgeu    xl, yl, 5f
1735 4:      movi    a2, -1
1736         leaf_return
1737
1738 .Llt_xneg:
1739         /* Check if y < x.  */
1740         bltu    yh, xh, 4b
1741         bne     yh, xh, 5f
1742         bltu    yl, xl, 4b
1743 5:      movi    a2, 0
1744         leaf_return
1745
1746 .Llt_diff_signs:
1747         bgez    xh, 5b
1748
1749         /* Check if both x and y are nonzero.  */
1750         or      a7, xh, yh
1751         slli    a7, a7, 1
1752         or      a7, a7, xl
1753         or      a7, a7, yl
1754         movi    a2, 0
1755         movi    a3, -1
1756         movnez  a2, a3, a7
1757         leaf_return
1758
1759
1760         /* Unordered */
1761
1762         .align  4
1763         .global __unorddf2
1764         .type   __unorddf2, @function
1765 __unorddf2:
1766         leaf_entry sp, 16
1767         movi    a6, 0x7ff00000
1768         ball    xh, a6, 3f
1769 1:      ball    yh, a6, 4f
1770 2:      movi    a2, 0
1771         leaf_return
1772
1773 3:      slli    a7, xh, 12
1774         or      a7, a7, xl
1775         beqz    a7, 1b
1776         movi    a2, 1
1777         leaf_return
1778
1779 4:      slli    a7, yh, 12
1780         or      a7, a7, yl
1781         beqz    a7, 2b
1782         movi    a2, 1
1783         leaf_return
1784
1785 #endif /* L_cmpdf2 */
1786
1787 #ifdef L_fixdfsi
1788
1789         .align  4
1790         .global __fixdfsi
1791         .type   __fixdfsi, @function
1792 __fixdfsi:
1793         leaf_entry sp, 16
1794
1795         /* Check for NaN and Infinity.  */
1796         movi    a6, 0x7ff00000
1797         ball    xh, a6, .Lfixdfsi_nan_or_inf
1798
1799         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1800         extui   a4, xh, 20, 11
1801         extui   a5, a6, 19, 10  /* 0x3fe */
1802         sub     a4, a4, a5
1803         bgei    a4, 32, .Lfixdfsi_maxint
1804         blti    a4, 1, .Lfixdfsi_zero
1805
1806         /* Add explicit "1.0" and shift << 11.  */
1807         or      a7, xh, a6
1808         ssai    (32 - 11)
1809         src     a5, a7, xl
1810
1811         /* Shift back to the right, based on the exponent.  */
1812         ssl     a4              /* shift by 32 - a4 */
1813         srl     a5, a5
1814
1815         /* Negate the result if sign != 0.  */
1816         neg     a2, a5
1817         movgez  a2, a5, a7
1818         leaf_return
1819
1820 .Lfixdfsi_nan_or_inf:
1821         /* Handle Infinity and NaN.  */
1822         slli    a4, xh, 12
1823         or      a4, a4, xl
1824         beqz    a4, .Lfixdfsi_maxint
1825
1826         /* Translate NaN to +maxint.  */
1827         movi    xh, 0
1828
1829 .Lfixdfsi_maxint:
1830         slli    a4, a6, 11      /* 0x80000000 */
1831         addi    a5, a4, -1      /* 0x7fffffff */
1832         movgez  a4, a5, xh
1833         mov     a2, a4
1834         leaf_return
1835
1836 .Lfixdfsi_zero:
1837         movi    a2, 0
1838         leaf_return
1839
1840 #endif /* L_fixdfsi */
1841
1842 #ifdef L_fixdfdi
1843
1844         .align  4
1845         .global __fixdfdi
1846         .type   __fixdfdi, @function
1847 __fixdfdi:
1848         leaf_entry sp, 16
1849
1850         /* Check for NaN and Infinity.  */
1851         movi    a6, 0x7ff00000
1852         ball    xh, a6, .Lfixdfdi_nan_or_inf
1853
1854         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1855         extui   a4, xh, 20, 11
1856         extui   a5, a6, 19, 10  /* 0x3fe */
1857         sub     a4, a4, a5
1858         bgei    a4, 64, .Lfixdfdi_maxint
1859         blti    a4, 1, .Lfixdfdi_zero
1860
1861         /* Add explicit "1.0" and shift << 11.  */
1862         or      a7, xh, a6
1863         ssai    (32 - 11)
1864         src     xh, a7, xl
1865         sll     xl, xl
1866
1867         /* Shift back to the right, based on the exponent.  */
1868         ssl     a4              /* shift by 64 - a4 */
1869         bgei    a4, 32, .Lfixdfdi_smallshift
1870         srl     xl, xh
1871         movi    xh, 0
1872
1873 .Lfixdfdi_shifted:
1874         /* Negate the result if sign != 0.  */
1875         bgez    a7, 1f
1876         neg     xl, xl
1877         neg     xh, xh
1878         beqz    xl, 1f
1879         addi    xh, xh, -1
1880 1:      leaf_return
1881
1882 .Lfixdfdi_smallshift:
1883         src     xl, xh, xl
1884         srl     xh, xh
1885         j       .Lfixdfdi_shifted
1886
1887 .Lfixdfdi_nan_or_inf:
1888         /* Handle Infinity and NaN.  */
1889         slli    a4, xh, 12
1890         or      a4, a4, xl
1891         beqz    a4, .Lfixdfdi_maxint
1892
1893         /* Translate NaN to +maxint.  */
1894         movi    xh, 0
1895
1896 .Lfixdfdi_maxint:
1897         slli    a7, a6, 11      /* 0x80000000 */
1898         bgez    xh, 1f
1899         mov     xh, a7
1900         movi    xl, 0
1901         leaf_return
1902
1903 1:      addi    xh, a7, -1      /* 0x7fffffff */
1904         movi    xl, -1
1905         leaf_return
1906
1907 .Lfixdfdi_zero:
1908         movi    xh, 0
1909         movi    xl, 0
1910         leaf_return
1911
1912 #endif /* L_fixdfdi */
1913
1914 #ifdef L_fixunsdfsi
1915
1916         .align  4
1917         .global __fixunsdfsi
1918         .type   __fixunsdfsi, @function
1919 __fixunsdfsi:
1920         leaf_entry sp, 16
1921
1922         /* Check for NaN and Infinity.  */
1923         movi    a6, 0x7ff00000
1924         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1925
1926         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1927         extui   a4, xh, 20, 11
1928         extui   a5, a6, 20, 10  /* 0x3ff */
1929         sub     a4, a4, a5
1930         bgei    a4, 32, .Lfixunsdfsi_maxint
1931         bltz    a4, .Lfixunsdfsi_zero
1932
1933         /* Add explicit "1.0" and shift << 11.  */
1934         or      a7, xh, a6
1935         ssai    (32 - 11)
1936         src     a5, a7, xl
1937
1938         /* Shift back to the right, based on the exponent.  */
1939         addi    a4, a4, 1
1940         beqi    a4, 32, .Lfixunsdfsi_bigexp
1941         ssl     a4              /* shift by 32 - a4 */
1942         srl     a5, a5
1943
1944         /* Negate the result if sign != 0.  */
1945         neg     a2, a5
1946         movgez  a2, a5, a7
1947         leaf_return
1948
1949 .Lfixunsdfsi_nan_or_inf:
1950         /* Handle Infinity and NaN.  */
1951         slli    a4, xh, 12
1952         or      a4, a4, xl
1953         beqz    a4, .Lfixunsdfsi_maxint
1954
1955         /* Translate NaN to 0xffffffff.  */
1956         movi    a2, -1
1957         leaf_return
1958
1959 .Lfixunsdfsi_maxint:
1960         slli    a4, a6, 11      /* 0x80000000 */
1961         movi    a5, -1          /* 0xffffffff */
1962         movgez  a4, a5, xh
1963         mov     a2, a4
1964         leaf_return
1965
1966 .Lfixunsdfsi_zero:
1967         movi    a2, 0
1968         leaf_return
1969
1970 .Lfixunsdfsi_bigexp:
1971         /* Handle unsigned maximum exponent case.  */
1972         bltz    xh, 1f
1973         mov     a2, a5          /* no shift needed */
1974         leaf_return
1975
1976         /* Return 0x80000000 if negative.  */
1977 1:      slli    a2, a6, 11
1978         leaf_return
1979
1980 #endif /* L_fixunsdfsi */
1981
1982 #ifdef L_fixunsdfdi
1983
1984         .align  4
1985         .global __fixunsdfdi
1986         .type   __fixunsdfdi, @function
1987 __fixunsdfdi:
1988         leaf_entry sp, 16
1989
1990         /* Check for NaN and Infinity.  */
1991         movi    a6, 0x7ff00000
1992         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
1993
1994         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
1995         extui   a4, xh, 20, 11
1996         extui   a5, a6, 20, 10  /* 0x3ff */
1997         sub     a4, a4, a5
1998         bgei    a4, 64, .Lfixunsdfdi_maxint
1999         bltz    a4, .Lfixunsdfdi_zero
2000
2001         /* Add explicit "1.0" and shift << 11.  */
2002         or      a7, xh, a6
2003         ssai    (32 - 11)
2004         src     xh, a7, xl
2005         sll     xl, xl
2006
2007         /* Shift back to the right, based on the exponent.  */
2008         addi    a4, a4, 1
2009         beqi    a4, 64, .Lfixunsdfdi_bigexp
2010         ssl     a4              /* shift by 64 - a4 */
2011         bgei    a4, 32, .Lfixunsdfdi_smallshift
2012         srl     xl, xh
2013         movi    xh, 0
2014
2015 .Lfixunsdfdi_shifted:
2016         /* Negate the result if sign != 0.  */
2017         bgez    a7, 1f
2018         neg     xl, xl
2019         neg     xh, xh
2020         beqz    xl, 1f
2021         addi    xh, xh, -1
2022 1:      leaf_return
2023
2024 .Lfixunsdfdi_smallshift:
2025         src     xl, xh, xl
2026         srl     xh, xh
2027         j       .Lfixunsdfdi_shifted
2028
2029 .Lfixunsdfdi_nan_or_inf:
2030         /* Handle Infinity and NaN.  */
2031         slli    a4, xh, 12
2032         or      a4, a4, xl
2033         beqz    a4, .Lfixunsdfdi_maxint
2034
2035         /* Translate NaN to 0xffffffff.... */
2036 1:      movi    xh, -1
2037         movi    xl, -1
2038         leaf_return
2039
2040 .Lfixunsdfdi_maxint:
2041         bgez    xh, 1b
2042 2:      slli    xh, a6, 11      /* 0x80000000 */
2043         movi    xl, 0
2044         leaf_return
2045
2046 .Lfixunsdfdi_zero:
2047         movi    xh, 0
2048         movi    xl, 0
2049         leaf_return
2050
2051 .Lfixunsdfdi_bigexp:
2052         /* Handle unsigned maximum exponent case.  */
2053         bltz    a7, 2b
2054         leaf_return             /* no shift needed */
2055
2056 #endif /* L_fixunsdfdi */
2057
2058 #ifdef L_floatsidf
2059
2060         .align  4
2061         .global __floatunsidf
2062         .type   __floatunsidf, @function
2063 __floatunsidf:
2064         leaf_entry sp, 16
2065         beqz    a2, .Lfloatsidf_return_zero
2066
2067         /* Set the sign to zero and jump to the floatsidf code.  */
2068         movi    a7, 0
2069         j       .Lfloatsidf_normalize
2070
2071         .align  4
2072         .global __floatsidf
2073         .type   __floatsidf, @function
2074 __floatsidf:
2075         leaf_entry sp, 16
2076
2077         /* Check for zero.  */
2078         beqz    a2, .Lfloatsidf_return_zero
2079
2080         /* Save the sign.  */
2081         extui   a7, a2, 31, 1
2082
2083         /* Get the absolute value.  */
2084 #if XCHAL_HAVE_ABS
2085         abs     a2, a2
2086 #else
2087         neg     a4, a2
2088         movltz  a2, a4, a2
2089 #endif
2090
2091 .Lfloatsidf_normalize:
2092         /* Normalize with the first 1 bit in the msb.  */
2093         do_nsau a4, a2, a5, a6
2094         ssl     a4
2095         sll     a5, a2
2096
2097         /* Shift the mantissa into position.  */
2098         srli    xh, a5, 11
2099         slli    xl, a5, (32 - 11)
2100
2101         /* Set the exponent.  */
2102         movi    a5, 0x41d       /* 0x3fe + 31 */
2103         sub     a5, a5, a4
2104         slli    a5, a5, 20
2105         add     xh, xh, a5
2106
2107         /* Add the sign and return. */
2108         slli    a7, a7, 31
2109         or      xh, xh, a7
2110         leaf_return
2111
2112 .Lfloatsidf_return_zero:
2113         movi    a3, 0
2114         leaf_return
2115
2116 #endif /* L_floatsidf */
2117
2118 #ifdef L_floatdidf
2119
2120         .align  4
2121         .global __floatundidf
2122         .type   __floatundidf, @function
2123 __floatundidf:
2124         leaf_entry sp, 16
2125
2126         /* Check for zero.  */
2127         or      a4, xh, xl
2128         beqz    a4, 2f
2129
2130         /* Set the sign to zero and jump to the floatdidf code.  */
2131         movi    a7, 0
2132         j       .Lfloatdidf_normalize
2133
2134         .align  4
2135         .global __floatdidf
2136         .type   __floatdidf, @function
2137 __floatdidf:
2138         leaf_entry sp, 16
2139
2140         /* Check for zero.  */
2141         or      a4, xh, xl
2142         beqz    a4, 2f
2143
2144         /* Save the sign.  */
2145         extui   a7, xh, 31, 1
2146
2147         /* Get the absolute value.  */
2148         bgez    xh, .Lfloatdidf_normalize
2149         neg     xl, xl
2150         neg     xh, xh
2151         beqz    xl, .Lfloatdidf_normalize
2152         addi    xh, xh, -1
2153
2154 .Lfloatdidf_normalize:
2155         /* Normalize with the first 1 bit in the msb of xh.  */
2156         beqz    xh, .Lfloatdidf_bigshift
2157         do_nsau a4, xh, a5, a6
2158         ssl     a4
2159         src     xh, xh, xl
2160         sll     xl, xl
2161
2162 .Lfloatdidf_shifted:
2163         /* Shift the mantissa into position, with rounding bits in a6.  */
2164         ssai    11
2165         sll     a6, xl
2166         src     xl, xh, xl
2167         srl     xh, xh
2168
2169         /* Set the exponent.  */
2170         movi    a5, 0x43d       /* 0x3fe + 63 */
2171         sub     a5, a5, a4
2172         slli    a5, a5, 20
2173         add     xh, xh, a5
2174
2175         /* Add the sign.  */
2176         slli    a7, a7, 31
2177         or      xh, xh, a7
2178
2179         /* Round up if the leftover fraction is >= 1/2.  */
2180         bgez    a6, 2f
2181         addi    xl, xl, 1
2182         beqz    xl, .Lfloatdidf_roundcarry
2183
2184         /* Check if the leftover fraction is exactly 1/2.  */
2185         slli    a6, a6, 1
2186         beqz    a6, .Lfloatdidf_exactlyhalf
2187 2:      leaf_return
2188
2189 .Lfloatdidf_bigshift:
2190         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2191         do_nsau a4, xl, a5, a6
2192         ssl     a4
2193         sll     xh, xl
2194         movi    xl, 0
2195         addi    a4, a4, 32
2196         j       .Lfloatdidf_shifted
2197
2198 .Lfloatdidf_exactlyhalf:
2199         /* Round down to the nearest even value.  */
2200         srli    xl, xl, 1
2201         slli    xl, xl, 1
2202         leaf_return
2203
2204 .Lfloatdidf_roundcarry:
2205         /* xl is always zero when the rounding increment overflows, so
2206            there's no need to round it to an even value.  */
2207         addi    xh, xh, 1
2208         /* Overflow to the exponent is OK.  */
2209         leaf_return
2210
2211 #endif /* L_floatdidf */
2212
2213 #ifdef L_truncdfsf2
2214
2215         .align  4
2216         .global __truncdfsf2
2217         .type   __truncdfsf2, @function
2218 __truncdfsf2:
2219         leaf_entry sp, 16
2220
2221         /* Adjust the exponent bias.  */
2222         movi    a4, (0x3ff - 0x7f) << 20
2223         sub     a5, xh, a4
2224
2225         /* Check for underflow.  */
2226         xor     a6, xh, a5
2227         bltz    a6, .Ltrunc_underflow
2228         extui   a6, a5, 20, 11
2229         beqz    a6, .Ltrunc_underflow
2230
2231         /* Check for overflow.  */
2232         movi    a4, 255
2233         bge     a6, a4, .Ltrunc_overflow
2234
2235         /* Shift a5/xl << 3 into a5/a4.  */
2236         ssai    (32 - 3)
2237         src     a5, a5, xl
2238         sll     a4, xl
2239
2240 .Ltrunc_addsign:
2241         /* Add the sign bit.  */
2242         extui   a6, xh, 31, 1
2243         slli    a6, a6, 31
2244         or      a2, a6, a5
2245
2246         /* Round up if the leftover fraction is >= 1/2.  */
2247         bgez    a4, 1f
2248         addi    a2, a2, 1
2249         /* Overflow to the exponent is OK.  The answer will be correct.  */
2250
2251         /* Check if the leftover fraction is exactly 1/2.  */
2252         slli    a4, a4, 1
2253         beqz    a4, .Ltrunc_exactlyhalf
2254 1:      leaf_return
2255
2256 .Ltrunc_exactlyhalf:
2257         /* Round down to the nearest even value.  */
2258         srli    a2, a2, 1
2259         slli    a2, a2, 1
2260         leaf_return
2261
2262 .Ltrunc_overflow:
2263         /* Check if exponent == 0x7ff.  */
2264         movi    a4, 0x7ff00000
2265         bnall   xh, a4, 1f
2266
2267         /* Check if mantissa is nonzero.  */
2268         slli    a5, xh, 12
2269         or      a5, a5, xl
2270         beqz    a5, 1f
2271
2272         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2273         srli    a4, a4, 1
2274
2275 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2276         /* Add the sign bit.  */
2277         extui   a6, xh, 31, 1
2278         ssai    1
2279         src     a2, a6, a4
2280         leaf_return
2281
2282 .Ltrunc_underflow:
2283         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2284         extui   a6, xh, 20, 11
2285         movi    a5, 0x3ff - 0x7f
2286         sub     a6, a5, a6
2287         addi    a6, a6, 1
2288         bgeui   a6, 32, 1f
2289
2290         /* Replace the exponent with an explicit "1.0".  */
2291         slli    a5, a5, 13      /* 0x700000 */
2292         or      a5, a5, xh
2293         slli    a5, a5, 11
2294         srli    a5, a5, 11
2295
2296         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2297         ssai    (32 - 3)
2298         src     a5, a5, xl
2299         sll     a4, xl
2300
2301         /* Shift right by a6.  */
2302         ssr     a6
2303         sll     a7, a4
2304         src     a4, a5, a4
2305         srl     a5, a5
2306         beqz    a7, .Ltrunc_addsign
2307         or      a4, a4, a6      /* any positive, nonzero value will work */
2308         j       .Ltrunc_addsign
2309
2310         /* Return +/- zero.  */
2311 1:      extui   a2, xh, 31, 1
2312         slli    a2, a2, 31
2313         leaf_return
2314
2315 #endif /* L_truncdfsf2 */
2316
2317 #ifdef L_extendsfdf2
2318
2319         .align  4
2320         .global __extendsfdf2
2321         .type   __extendsfdf2, @function
2322 __extendsfdf2:
2323         leaf_entry sp, 16
2324
2325         /* Save the sign bit and then shift it off.  */
2326         extui   a5, a2, 31, 1
2327         slli    a5, a5, 31
2328         slli    a4, a2, 1
2329
2330         /* Extract and check the exponent.  */
2331         extui   a6, a2, 23, 8
2332         beqz    a6, .Lextend_expzero
2333         addi    a6, a6, 1
2334         beqi    a6, 256, .Lextend_nan_or_inf
2335
2336         /* Shift >> 3 into a4/xl.  */
2337         srli    a4, a4, 4
2338         slli    xl, a2, (32 - 3)
2339
2340         /* Adjust the exponent bias.  */
2341         movi    a6, (0x3ff - 0x7f) << 20
2342         add     a4, a4, a6
2343
2344         /* Add the sign bit.  */
2345         or      xh, a4, a5
2346         leaf_return
2347
2348 .Lextend_nan_or_inf:
2349         movi    a4, 0x7ff00000
2350
2351         /* Check for NaN.  */
2352         slli    a7, a2, 9
2353         beqz    a7, 1f
2354
2355         slli    a6, a6, 11      /* 0x80000 */
2356         or      a4, a4, a6
2357
2358         /* Add the sign and return.  */
2359 1:      or      xh, a4, a5
2360         movi    xl, 0
2361         leaf_return
2362
2363 .Lextend_expzero:
2364         beqz    a4, 1b
2365
2366         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2367         do_nsau a7, a4, a2, a3
2368         addi    a7, a7, -8
2369         ssl     a7
2370         sll     a4, a4
2371
2372         /* Shift >> 3 into a4/xl.  */
2373         slli    xl, a4, (32 - 3)
2374         srli    a4, a4, 3
2375
2376         /* Set the exponent.  */
2377         movi    a6, 0x3fe - 0x7f
2378         sub     a6, a6, a7
2379         slli    a6, a6, 20
2380         add     a4, a4, a6
2381
2382         /* Add the sign and return.  */
2383         or      xh, a4, a5
2384         leaf_return
2385
2386 #endif /* L_extendsfdf2 */
2387
2388