gcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    In addition to the permissions in the GNU General Public License,
  13    the Free Software Foundation gives you unlimited permission to link
  14    the compiled version of this file into combinations with other
  15    programs, and to distribute those combinations without any
  16    restriction coming from the use of this file.  (The General Public
  17    License restrictions do apply in other respects; for example, they
  18    cover modification of the file, and distribution when not linked
  19    into a combine executable.)
  20
  21    GCC is distributed in the hope that it will be useful, but WITHOUT
  22    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  23    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  24    License for more details.
  25
  26    You should have received a copy of the GNU General Public License
  27    along with GCC; see the file COPYING.  If not, write to the Free
  28    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29    02110-1301, USA.  */
  30
  31 #ifdef __XTENSA_EB__
  32 #define xh a2
  33 #define xl a3
  34 #define yh a4
  35 #define yl a5
  36 #else
  37 #define xh a3
  38 #define xl a2
  39 #define yh a5
  40 #define yl a4
  41 #endif
  42
  43 /*  Warning!  The branch displacements for some Xtensa branch instructions
  44     are quite small, and this code has been carefully laid out to keep
  45     branch targets in range.  If you change anything, be sure to check that
  46     the assembler is not relaxing anything to branch over a jump.  */
  47
  48 #ifdef L_negdf2
  49
  50         .align  4
  51         .global __negdf2
  52         .type   __negdf2, @function
  53 __negdf2:
  54         leaf_entry sp, 16
  55         movi    a4, 0x80000000
  56         xor     xh, xh, a4
  57         leaf_return
  58
  59 #endif /* L_negdf2 */
  60
  61 #ifdef L_addsubdf3
  62
  63         /* Addition */
  64 __adddf3_aux:
  65
  66         /* Handle NaNs and Infinities.  (This code is placed before the
  67            start of the function just to keep it in range of the limited
  68            branch displacements.)  */
  69
  70 .Ladd_xnan_or_inf:
  71         /* If y is neither Infinity nor NaN, return x.  */
  72         bnall   yh, a6, 1f
  73         /* If x is a NaN, return it.  Otherwise, return y.  */
  74         slli    a7, xh, 12
  75         or      a7, a7, xl
  76         beqz    a7, .Ladd_ynan_or_inf
  77 1:      leaf_return
  78
  79 .Ladd_ynan_or_inf:
  80         /* Return y.  */
  81         mov     xh, yh
  82         mov     xl, yl
  83         leaf_return
  84
  85 .Ladd_opposite_signs:
  86         /* Operand signs differ.  Do a subtraction.  */
  87         slli    a7, a6, 11
  88         xor     yh, yh, a7
  89         j       .Lsub_same_sign
  90
  91         .align  4
  92         .global __adddf3
  93         .type   __adddf3, @function
  94 __adddf3:
  95         leaf_entry sp, 16
  96         movi    a6, 0x7ff00000
  97
  98         /* Check if the two operands have the same sign.  */
  99         xor     a7, xh, yh
 100         bltz    a7, .Ladd_opposite_signs
 101
 102 .Ladd_same_sign:
 103         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 104         ball    xh, a6, .Ladd_xnan_or_inf
 105         ball    yh, a6, .Ladd_ynan_or_inf
 106
 107         /* Compare the exponents.  The smaller operand will be shifted
 108            right by the exponent difference and added to the larger
 109            one.  */
 110         extui   a7, xh, 20, 12
 111         extui   a8, yh, 20, 12
 112         bltu    a7, a8, .Ladd_shiftx
 113
 114 .Ladd_shifty:
 115         /* Check if the smaller (or equal) exponent is zero.  */
 116         bnone   yh, a6, .Ladd_yexpzero
 117
 118         /* Replace yh sign/exponent with 0x001.  */
 119         or      yh, yh, a6
 120         slli    yh, yh, 11
 121         srli    yh, yh, 11
 122
 123 .Ladd_yexpdiff:
 124         /* Compute the exponent difference.  Optimize for difference < 32.  */
 125         sub     a10, a7, a8
 126         bgeui   a10, 32, .Ladd_bigshifty
 127
 128         /* Shift yh/yl right by the exponent difference.  Any bits that are
 129            shifted out of yl are saved in a9 for rounding the result.  */
 130         ssr     a10
 131         movi    a9, 0
 132         src     a9, yl, a9
 133         src     yl, yh, yl
 134         srl     yh, yh
 135
 136 .Ladd_addy:
 137         /* Do the 64-bit addition.  */
 138         add     xl, xl, yl
 139         add     xh, xh, yh
 140         bgeu    xl, yl, 1f
 141         addi    xh, xh, 1
 142 1:
 143         /* Check if the add overflowed into the exponent.  */
 144         extui   a10, xh, 20, 12
 145         beq     a10, a7, .Ladd_round
 146         mov     a8, a7
 147         j       .Ladd_carry
 148
 149 .Ladd_yexpzero:
 150         /* y is a subnormal value.  Replace its sign/exponent with zero,
 151            i.e., no implicit "1.0", and increment the apparent exponent
 152            because subnormals behave as if they had the minimum (nonzero)
 153            exponent.  Test for the case when both exponents are zero.  */
 154         slli    yh, yh, 12
 155         srli    yh, yh, 12
 156         bnone   xh, a6, .Ladd_bothexpzero
 157         addi    a8, a8, 1
 158         j       .Ladd_yexpdiff
 159
 160 .Ladd_bothexpzero:
 161         /* Both exponents are zero.  Handle this as a special case.  There
 162            is no need to shift or round, and the normal code for handling
 163            a carry into the exponent field will not work because it
 164            assumes there is an implicit "1.0" that needs to be added.  */
 165         add     xl, xl, yl
 166         add     xh, xh, yh
 167         bgeu    xl, yl, 1f
 168         addi    xh, xh, 1
 169 1:      leaf_return
 170
 171 .Ladd_bigshifty:
 172         /* Exponent difference > 64 -- just return the bigger value.  */
 173         bgeui   a10, 64, 1b
 174
 175         /* Shift yh/yl right by the exponent difference.  Any bits that are
 176            shifted out are saved in a9 for rounding the result.  */
 177         ssr     a10
 178         sll     a11, yl         /* lost bits shifted out of yl */
 179         src     a9, yh, yl
 180         srl     yl, yh
 181         movi    yh, 0
 182         beqz    a11, .Ladd_addy
 183         or      a9, a9, a10     /* any positive, nonzero value will work */
 184         j       .Ladd_addy
 185
 186 .Ladd_xexpzero:
 187         /* Same as "yexpzero" except skip handling the case when both
 188            exponents are zero.  */
 189         slli    xh, xh, 12
 190         srli    xh, xh, 12
 191         addi    a7, a7, 1
 192         j       .Ladd_xexpdiff
 193
 194 .Ladd_shiftx:
 195         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 196            because the exponent difference is always nonzero in this version,
 197            the shift sequence can use SLL and skip loading a constant zero.  */
 198         bnone   xh, a6, .Ladd_xexpzero
 199
 200         or      xh, xh, a6
 201         slli    xh, xh, 11
 202         srli    xh, xh, 11
 203
 204 .Ladd_xexpdiff:
 205         sub     a10, a8, a7
 206         bgeui   a10, 32, .Ladd_bigshiftx
 207
 208         ssr     a10
 209         sll     a9, xl
 210         src     xl, xh, xl
 211         srl     xh, xh
 212
 213 .Ladd_addx:
 214         add     xl, xl, yl
 215         add     xh, xh, yh
 216         bgeu    xl, yl, 1f
 217         addi    xh, xh, 1
 218 1:
 219         /* Check if the add overflowed into the exponent.  */
 220         extui   a10, xh, 20, 12
 221         bne     a10, a8, .Ladd_carry
 222
 223 .Ladd_round:
 224         /* Round up if the leftover fraction is >= 1/2.  */
 225         bgez    a9, 1f
 226         addi    xl, xl, 1
 227         beqz    xl, .Ladd_roundcarry
 228
 229         /* Check if the leftover fraction is exactly 1/2.  */
 230         slli    a9, a9, 1
 231         beqz    a9, .Ladd_exactlyhalf
 232 1:      leaf_return
 233
 234 .Ladd_bigshiftx:
 235         /* Mostly the same thing as "bigshifty"....  */
 236         bgeui   a10, 64, .Ladd_returny
 237
 238         ssr     a10
 239         sll     a11, xl
 240         src     a9, xh, xl
 241         srl     xl, xh
 242         movi    xh, 0
 243         beqz    a11, .Ladd_addx
 244         or      a9, a9, a10
 245         j       .Ladd_addx
 246
 247 .Ladd_returny:
 248         mov     xh, yh
 249         mov     xl, yl
 250         leaf_return
 251
 252 .Ladd_carry:
 253         /* The addition has overflowed into the exponent field, so the
 254            value needs to be renormalized.  The mantissa of the result
 255            can be recovered by subtracting the original exponent and
 256            adding 0x100000 (which is the explicit "1.0" for the
 257            mantissa of the non-shifted operand -- the "1.0" for the
 258            shifted operand was already added).  The mantissa can then
 259            be shifted right by one bit.  The explicit "1.0" of the
 260            shifted mantissa then needs to be replaced by the exponent,
 261            incremented by one to account for the normalizing shift.
 262            It is faster to combine these operations: do the shift first
 263            and combine the additions and subtractions.  If x is the
 264            original exponent, the result is:
 265                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 266            or:
 267                shifted mantissa + ((x + 1) << 19)
 268            Note that the exponent is incremented here by leaving the
 269            explicit "1.0" of the mantissa in the exponent field.  */
 270
 271         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 272         mov     a10, xl
 273         ssai    1
 274         src     xl, xh, xl
 275         srl     xh, xh
 276
 277         /* See explanation above.  The original exponent is in a8.  */
 278         addi    a8, a8, 1
 279         slli    a8, a8, 19
 280         add     xh, xh, a8
 281
 282         /* Return an Infinity if the exponent overflowed.  */
 283         ball    xh, a6, .Ladd_infinity
 284
 285         /* Same thing as the "round" code except the msb of the leftover
 286            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 287         bbci.l  a10, 0, 1f
 288         addi    xl, xl, 1
 289         beqz    xl, .Ladd_roundcarry
 290         beqz    a9, .Ladd_exactlyhalf
 291 1:      leaf_return
 292
 293 .Ladd_infinity:
 294         /* Clear the mantissa.  */
 295         movi    xl, 0
 296         srli    xh, xh, 20
 297         slli    xh, xh, 20
 298
 299         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 300         slli    a8, a8, 1
 301         or      xh, xh, a8
 302         leaf_return
 303
 304 .Ladd_exactlyhalf:
 305         /* Round down to the nearest even value.  */
 306         srli    xl, xl, 1
 307         slli    xl, xl, 1
 308         leaf_return
 309
 310 .Ladd_roundcarry:
 311         /* xl is always zero when the rounding increment overflows, so
 312            there's no need to round it to an even value.  */
 313         addi    xh, xh, 1
 314         /* Overflow to the exponent is OK.  */
 315         leaf_return
 316
 317
 318         /* Subtraction */
 319 __subdf3_aux:
 320
 321         /* Handle NaNs and Infinities.  (This code is placed before the
 322            start of the function just to keep it in range of the limited
 323            branch displacements.)  */
 324
 325 .Lsub_xnan_or_inf:
 326         /* If y is neither Infinity nor NaN, return x.  */
 327         bnall   yh, a6, 1f
 328         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 329         movi    a4, 0x80000     /* make it a quiet NaN */
 330         or      xh, xh, a4
 331 1:      leaf_return
 332
 333 .Lsub_ynan_or_inf:
 334         /* Negate y and return it.  */
 335         slli    a7, a6, 11
 336         xor     xh, yh, a7
 337         mov     xl, yl
 338         leaf_return
 339
 340 .Lsub_opposite_signs:
 341         /* Operand signs differ.  Do an addition.  */
 342         slli    a7, a6, 11
 343         xor     yh, yh, a7
 344         j       .Ladd_same_sign
 345
 346         .align  4
 347         .global __subdf3
 348         .type   __subdf3, @function
 349 __subdf3:
 350         leaf_entry sp, 16
 351         movi    a6, 0x7ff00000
 352
 353         /* Check if the two operands have the same sign.  */
 354         xor     a7, xh, yh
 355         bltz    a7, .Lsub_opposite_signs
 356
 357 .Lsub_same_sign:
 358         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 359         ball    xh, a6, .Lsub_xnan_or_inf
 360         ball    yh, a6, .Lsub_ynan_or_inf
 361
 362         /* Compare the operands.  In contrast to addition, the entire
 363            value matters here.  */
 364         extui   a7, xh, 20, 11
 365         extui   a8, yh, 20, 11
 366         bltu    xh, yh, .Lsub_xsmaller
 367         beq     xh, yh, .Lsub_compare_low
 368
 369 .Lsub_ysmaller:
 370         /* Check if the smaller (or equal) exponent is zero.  */
 371         bnone   yh, a6, .Lsub_yexpzero
 372
 373         /* Replace yh sign/exponent with 0x001.  */
 374         or      yh, yh, a6
 375         slli    yh, yh, 11
 376         srli    yh, yh, 11
 377
 378 .Lsub_yexpdiff:
 379         /* Compute the exponent difference.  Optimize for difference < 32.  */
 380         sub     a10, a7, a8
 381         bgeui   a10, 32, .Lsub_bigshifty
 382
 383         /* Shift yh/yl right by the exponent difference.  Any bits that are
 384            shifted out of yl are saved in a9 for rounding the result.  */
 385         ssr     a10
 386         movi    a9, 0
 387         src     a9, yl, a9
 388         src     yl, yh, yl
 389         srl     yh, yh
 390
 391 .Lsub_suby:
 392         /* Do the 64-bit subtraction.  */
 393         sub     xh, xh, yh
 394         bgeu    xl, yl, 1f
 395         addi    xh, xh, -1
 396 1:      sub     xl, xl, yl
 397
 398         /* Subtract the leftover bits in a9 from zero and propagate any
 399            borrow from xh/xl.  */
 400         neg     a9, a9
 401         beqz    a9, 1f
 402         addi    a5, xh, -1
 403         moveqz  xh, a5, xl
 404         addi    xl, xl, -1
 405 1:
 406         /* Check if the subtract underflowed into the exponent.  */
 407         extui   a10, xh, 20, 11
 408         beq     a10, a7, .Lsub_round
 409         j       .Lsub_borrow
 410
 411 .Lsub_compare_low:
 412         /* The high words are equal.  Compare the low words.  */
 413         bltu    xl, yl, .Lsub_xsmaller
 414         bltu    yl, xl, .Lsub_ysmaller
 415         /* The operands are equal.  Return 0.0.  */
 416         movi    xh, 0
 417         movi    xl, 0
 418 1:      leaf_return
 419
 420 .Lsub_yexpzero:
 421         /* y is a subnormal value.  Replace its sign/exponent with zero,
 422            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 423            y's apparent exponent because subnormals behave as if they had
 424            the minimum (nonzero) exponent.  */
 425         slli    yh, yh, 12
 426         srli    yh, yh, 12
 427         bnone   xh, a6, .Lsub_yexpdiff
 428         addi    a8, a8, 1
 429         j       .Lsub_yexpdiff
 430
 431 .Lsub_bigshifty:
 432         /* Exponent difference > 64 -- just return the bigger value.  */
 433         bgeui   a10, 64, 1b
 434
 435         /* Shift yh/yl right by the exponent difference.  Any bits that are
 436            shifted out are saved in a9 for rounding the result.  */
 437         ssr     a10
 438         sll     a11, yl         /* lost bits shifted out of yl */
 439         src     a9, yh, yl
 440         srl     yl, yh
 441         movi    yh, 0
 442         beqz    a11, .Lsub_suby
 443         or      a9, a9, a10     /* any positive, nonzero value will work */
 444         j       .Lsub_suby
 445
 446 .Lsub_xsmaller:
 447         /* Same thing as the "ysmaller" code, but with x and y swapped and
 448            with y negated.  */
 449         bnone   xh, a6, .Lsub_xexpzero
 450
 451         or      xh, xh, a6
 452         slli    xh, xh, 11
 453         srli    xh, xh, 11
 454
 455 .Lsub_xexpdiff:
 456         sub     a10, a8, a7
 457         bgeui   a10, 32, .Lsub_bigshiftx
 458
 459         ssr     a10
 460         movi    a9, 0
 461         src     a9, xl, a9
 462         src     xl, xh, xl
 463         srl     xh, xh
 464
 465         /* Negate y.  */
 466         slli    a11, a6, 11
 467         xor     yh, yh, a11
 468
 469 .Lsub_subx:
 470         sub     xl, yl, xl
 471         sub     xh, yh, xh
 472         bgeu    yl, xl, 1f
 473         addi    xh, xh, -1
 474 1:
 475         /* Subtract the leftover bits in a9 from zero and propagate any
 476            borrow from xh/xl.  */
 477         neg     a9, a9
 478         beqz    a9, 1f
 479         addi    a5, xh, -1
 480         moveqz  xh, a5, xl
 481         addi    xl, xl, -1
 482 1:
 483         /* Check if the subtract underflowed into the exponent.  */
 484         extui   a10, xh, 20, 11
 485         bne     a10, a8, .Lsub_borrow
 486
 487 .Lsub_round:
 488         /* Round up if the leftover fraction is >= 1/2.  */
 489         bgez    a9, 1f
 490         addi    xl, xl, 1
 491         beqz    xl, .Lsub_roundcarry
 492
 493         /* Check if the leftover fraction is exactly 1/2.  */
 494         slli    a9, a9, 1
 495         beqz    a9, .Lsub_exactlyhalf
 496 1:      leaf_return
 497
 498 .Lsub_xexpzero:
 499         /* Same as "yexpzero".  */
 500         slli    xh, xh, 12
 501         srli    xh, xh, 12
 502         bnone   yh, a6, .Lsub_xexpdiff
 503         addi    a7, a7, 1
 504         j       .Lsub_xexpdiff
 505
 506 .Lsub_bigshiftx:
 507         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 508            shifted value set so that the subsequent subtraction flips the
 509            sign of y.  */
 510         bgeui   a10, 64, .Lsub_returny
 511
 512         ssr     a10
 513         sll     a11, xl
 514         src     a9, xh, xl
 515         srl     xl, xh
 516         slli    xh, a6, 11      /* set sign bit of xh */
 517         beqz    a11, .Lsub_subx
 518         or      a9, a9, a10
 519         j       .Lsub_subx
 520
 521 .Lsub_returny:
 522         /* Negate and return y.  */
 523         slli    a7, a6, 11
 524         xor     xh, yh, a7
 525         mov     xl, yl
 526         leaf_return
 527
 528 .Lsub_borrow:
 529         /* The subtraction has underflowed into the exponent field, so the
 530            value needs to be renormalized.  Shift the mantissa left as
 531            needed to remove any leading zeros and adjust the exponent
 532            accordingly.  If the exponent is not large enough to remove
 533            all the leading zeros, the result will be a subnormal value.  */
 534
 535         slli    a8, xh, 12
 536         beqz    a8, .Lsub_xhzero
 537         do_nsau a6, a8, a7, a11
 538         srli    a8, a8, 12
 539         bge     a6, a10, .Lsub_subnormal
 540         addi    a6, a6, 1
 541
 542 .Lsub_shift_lt32:
 543         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 544         ssl     a6
 545         src     a8, a8, xl
 546         src     xl, xl, a9
 547         sll     a9, a9
 548
 549         /* Combine the shifted mantissa with the sign and exponent,
 550            decrementing the exponent by a6.  (The exponent has already
 551            been decremented by one due to the borrow from the subtraction,
 552            but adding the mantissa will increment the exponent by one.)  */
 553         srli    xh, xh, 20
 554         sub     xh, xh, a6
 555         slli    xh, xh, 20
 556         add     xh, xh, a8
 557         j       .Lsub_round
 558
 559 .Lsub_exactlyhalf:
 560         /* Round down to the nearest even value.  */
 561         srli    xl, xl, 1
 562         slli    xl, xl, 1
 563         leaf_return
 564
 565 .Lsub_roundcarry:
 566         /* xl is always zero when the rounding increment overflows, so
 567            there's no need to round it to an even value.  */
 568         addi    xh, xh, 1
 569         /* Overflow to the exponent is OK.  */
 570         leaf_return
 571
 572 .Lsub_xhzero:
 573         /* When normalizing the result, all the mantissa bits in the high
 574            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 575         do_nsau a6, xl, a7, a11
 576         addi    a6, a6, 21
 577         blt     a10, a6, .Lsub_subnormal
 578
 579 .Lsub_normalize_shift:
 580         bltui   a6, 32, .Lsub_shift_lt32
 581
 582         ssl     a6
 583         src     a8, xl, a9
 584         sll     xl, a9
 585         movi    a9, 0
 586
 587         srli    xh, xh, 20
 588         sub     xh, xh, a6
 589         slli    xh, xh, 20
 590         add     xh, xh, a8
 591         j       .Lsub_round
 592
 593 .Lsub_subnormal:
 594         /* The exponent is too small to shift away all the leading zeros.
 595            Set a6 to the current exponent (which has already been
 596            decremented by the borrow) so that the exponent of the result
 597            will be zero.  Do not add 1 to a6 in this case, because: (1)
 598            adding the mantissa will not increment the exponent, so there is
 599            no need to subtract anything extra from the exponent to
 600            compensate, and (2) the effective exponent of a subnormal is 1
 601            not 0 so the shift amount must be 1 smaller than normal. */
 602         mov     a6, a10
 603         j       .Lsub_normalize_shift
 604
 605 #endif /* L_addsubdf3 */
 606
 607 #ifdef L_muldf3
 608
 609         /* Multiplication */
 610 __muldf3_aux:
 611
 612         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 613            (This code is placed before the start of the function just to
 614            keep it in range of the limited branch displacements.)  */
 615
 616 .Lmul_xexpzero:
 617         /* Clear the sign bit of x.  */
 618         slli    xh, xh, 1
 619         srli    xh, xh, 1
 620
 621         /* If x is zero, return zero.  */
 622         or      a10, xh, xl
 623         beqz    a10, .Lmul_return_zero
 624
 625         /* Normalize x.  Adjust the exponent in a8.  */
 626         beqz    xh, .Lmul_xh_zero
 627         do_nsau a10, xh, a11, a12
 628         addi    a10, a10, -11
 629         ssl     a10
 630         src     xh, xh, xl
 631         sll     xl, xl
 632         movi    a8, 1
 633         sub     a8, a8, a10
 634         j       .Lmul_xnormalized
 635 .Lmul_xh_zero:
 636         do_nsau a10, xl, a11, a12
 637         addi    a10, a10, -11
 638         movi    a8, -31
 639         sub     a8, a8, a10
 640         ssl     a10
 641         bltz    a10, .Lmul_xl_srl
 642         sll     xh, xl
 643         movi    xl, 0
 644         j       .Lmul_xnormalized
 645 .Lmul_xl_srl:
 646         srl     xh, xl
 647         sll     xl, xl
 648         j       .Lmul_xnormalized
 649
 650 .Lmul_yexpzero:
 651         /* Clear the sign bit of y.  */
 652         slli    yh, yh, 1
 653         srli    yh, yh, 1
 654
 655         /* If y is zero, return zero.  */
 656         or      a10, yh, yl
 657         beqz    a10, .Lmul_return_zero
 658
 659         /* Normalize y.  Adjust the exponent in a9.  */
 660         beqz    yh, .Lmul_yh_zero
 661         do_nsau a10, yh, a11, a12
 662         addi    a10, a10, -11
 663         ssl     a10
 664         src     yh, yh, yl
 665         sll     yl, yl
 666         movi    a9, 1
 667         sub     a9, a9, a10
 668         j       .Lmul_ynormalized
 669 .Lmul_yh_zero:
 670         do_nsau a10, yl, a11, a12
 671         addi    a10, a10, -11
 672         movi    a9, -31
 673         sub     a9, a9, a10
 674         ssl     a10
 675         bltz    a10, .Lmul_yl_srl
 676         sll     yh, yl
 677         movi    yl, 0
 678         j       .Lmul_ynormalized
 679 .Lmul_yl_srl:
 680         srl     yh, yl
 681         sll     yl, yl
 682         j       .Lmul_ynormalized
 683
 684 .Lmul_return_zero:
 685         /* Return zero with the appropriate sign bit.  */
 686         srli    xh, a7, 31
 687         slli    xh, xh, 31
 688         movi    xl, 0
 689         j       .Lmul_done
 690
 691 .Lmul_xnan_or_inf:
 692         /* If y is zero, return NaN.  */
 693         bnez    yl, 1f
 694         slli    a8, yh, 1
 695         bnez    a8, 1f
 696         movi    a4, 0x80000     /* make it a quiet NaN */
 697         or      xh, xh, a4
 698         j       .Lmul_done
 699 1:
 700         /* If y is NaN, return y.  */
 701         bnall   yh, a6, .Lmul_returnx
 702         slli    a8, yh, 12
 703         or      a8, a8, yl
 704         beqz    a8, .Lmul_returnx
 705
 706 .Lmul_returny:
 707         mov     xh, yh
 708         mov     xl, yl
 709
 710 .Lmul_returnx:
 711         /* Set the sign bit and return.  */
 712         extui   a7, a7, 31, 1
 713         slli    xh, xh, 1
 714         ssai    1
 715         src     xh, a7, xh
 716         j       .Lmul_done
 717
 718 .Lmul_ynan_or_inf:
 719         /* If x is zero, return NaN.  */
 720         bnez    xl, .Lmul_returny
 721         slli    a8, xh, 1
 722         bnez    a8, .Lmul_returny
 723         movi    a7, 0x80000     /* make it a quiet NaN */
 724         or      xh, yh, a7
 725         j       .Lmul_done
 726
 727         .align  4
 728         .global __muldf3
 729         .type   __muldf3, @function
 730 __muldf3:
 731         leaf_entry sp, 32
 732 #if __XTENSA_CALL0_ABI__
 733         addi    sp, sp, -32
 734         s32i    a12, sp, 16
 735         s32i    a13, sp, 20
 736         s32i    a14, sp, 24
 737         s32i    a15, sp, 28
 738 #endif
 739         movi    a6, 0x7ff00000
 740
 741         /* Get the sign of the result.  */
 742         xor     a7, xh, yh
 743
 744         /* Check for NaN and infinity.  */
 745         ball    xh, a6, .Lmul_xnan_or_inf
 746         ball    yh, a6, .Lmul_ynan_or_inf
 747
 748         /* Extract the exponents.  */
 749         extui   a8, xh, 20, 11
 750         extui   a9, yh, 20, 11
 751
 752         beqz    a8, .Lmul_xexpzero
 753 .Lmul_xnormalized:
 754         beqz    a9, .Lmul_yexpzero
 755 .Lmul_ynormalized:
 756
 757         /* Add the exponents.  */
 758         add     a8, a8, a9
 759
 760         /* Replace sign/exponent fields with explicit "1.0".  */
 761         movi    a10, 0x1fffff
 762         or      xh, xh, a6
 763         and     xh, xh, a10
 764         or      yh, yh, a6
 765         and     yh, yh, a10
 766
 767         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 768            The least-significant word of the result is thrown away except
 769            that if it is nonzero, the lsb of a6 is set to 1.  */
 770 #if XCHAL_HAVE_MUL32_HIGH
 771
 772         /* Compute a6 with any carry-outs in a10.  */
 773         movi    a10, 0
 774         mull    a6, xl, yh
 775         mull    a11, xh, yl
 776         add     a6, a6, a11
 777         bgeu    a6, a11, 1f
 778         addi    a10, a10, 1
 779 1:
 780         muluh   a11, xl, yl
 781         add     a6, a6, a11
 782         bgeu    a6, a11, 1f
 783         addi    a10, a10, 1
 784 1:
 785         /* If the low word of the result is nonzero, set the lsb of a6.  */
 786         mull    a11, xl, yl
 787         beqz    a11, 1f
 788         movi    a9, 1
 789         or      a6, a6, a9
 790 1:
 791         /* Compute xl with any carry-outs in a9.  */
 792         movi    a9, 0
 793         mull    a11, xh, yh
 794         add     a10, a10, a11
 795         bgeu    a10, a11, 1f
 796         addi    a9, a9, 1
 797 1:
 798         muluh   a11, xh, yl
 799         add     a10, a10, a11
 800         bgeu    a10, a11, 1f
 801         addi    a9, a9, 1
 802 1:
 803         muluh   xl, xl, yh
 804         add     xl, xl, a10
 805         bgeu    xl, a10, 1f
 806         addi    a9, a9, 1
 807 1:
 808         /* Compute xh.  */
 809         muluh   xh, xh, yh
 810         add     xh, xh, a9
 811
 812 #else
 813
 814         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 815            products.  These partial products are:
 816
 817                 0 xll * yll
 818
 819                 1 xll * ylh
 820                 2 xlh * yll
 821
 822                 3 xll * yhl
 823                 4 xlh * ylh
 824                 5 xhl * yll
 825
 826                 6 xll * yhh
 827                 7 xlh * yhl
 828                 8 xhl * ylh
 829                 9 xhh * yll
 830
 831                 10 xlh * yhh
 832                 11 xhl * yhl
 833                 12 xhh * ylh
 834
 835                 13 xhl * yhh
 836                 14 xhh * yhl
 837
 838                 15 xhh * yhh
 839
 840            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 841            or Mul32 multiplier options, these input chunks must be stored in
 842            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 843            that the inputs come from either half of the registers, so there
 844            is no need to shift them out ahead of time.  If there is no
 845            multiply hardware, the 16-bit chunks can be extracted when setting
 846            up the arguments to the separate multiply function.  */
 847
 848         /* Save a7 since it is needed to hold a temporary value.  */
 849         s32i    a7, sp, 4
 850 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 851         /* Calling a separate multiply function will clobber a0 and requires
 852            use of a8 as a temporary, so save those values now.  (The function
 853            uses a custom ABI so nothing else needs to be saved.)  */
 854         s32i    a0, sp, 0
 855         s32i    a8, sp, 8
 856 #endif
 857
 858 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 859
 860 #define xlh a12
 861 #define ylh a13
 862 #define xhh a14
 863 #define yhh a15
 864
 865         /* Get the high halves of the inputs into registers.  */
 866         srli    xlh, xl, 16
 867         srli    ylh, yl, 16
 868         srli    xhh, xh, 16
 869         srli    yhh, yh, 16
 870
 871 #define xll xl
 872 #define yll yl
 873 #define xhl xh
 874 #define yhl yh
 875
 876 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 877         /* Clear the high halves of the inputs.  This does not matter
 878            for MUL16 because the high bits are ignored.  */
 879         extui   xl, xl, 0, 16
 880         extui   xh, xh, 0, 16
 881         extui   yl, yl, 0, 16
 882         extui   yh, yh, 0, 16
 883 #endif
 884 #endif /* MUL16 || MUL32 */
 885
 886
 887 #if XCHAL_HAVE_MUL16
 888
 889 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 890         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 891
 892 #elif XCHAL_HAVE_MUL32
 893
 894 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 895         mull    dst, xreg ## xhalf, yreg ## yhalf
 896
 897 #elif XCHAL_HAVE_MAC16
 898
 899 /* The preprocessor insists on inserting a space when concatenating after
 900    a period in the definition of do_mul below.  These macros are a workaround
 901    using underscores instead of periods when doing the concatenation.  */
 902 #define umul_aa_ll umul.aa.ll
 903 #define umul_aa_lh umul.aa.lh
 904 #define umul_aa_hl umul.aa.hl
 905 #define umul_aa_hh umul.aa.hh
 906
 907 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 908         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 909         rsr     dst, ACCLO
 910
 911 #else /* no multiply hardware */
 912
 913 #define set_arg_l(dst, src) \
 914         extui   dst, src, 0, 16
 915 #define set_arg_h(dst, src) \
 916         srli    dst, src, 16
 917
 918 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 919         set_arg_ ## xhalf (a13, xreg); \
 920         set_arg_ ## yhalf (a14, yreg); \
 921         call0   .Lmul_mulsi3; \
 922         mov     dst, a12
 923 #endif
 924
 925         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 926         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 927         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 928         movi    a9, 0
 929         add     a10, a10, a11
 930         bgeu    a10, a11, 1f
 931         addi    a9, a9, 1
 932 1:
 933         /* Initialize a6 with a9/a10 shifted into position.  Note that
 934            this value can be safely incremented without any carry-outs.  */
 935         ssai    16
 936         src     a6, a9, a10
 937
 938         /* Compute the low word into a10.  */
 939         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 940         sll     a10, a10
 941         add     a10, a10, a11
 942         bgeu    a10, a11, 1f
 943         addi    a6, a6, 1
 944 1:
 945         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 946            This is good enough to determine the low half of a6, so that any
 947            nonzero bits from the low word of the result can be collapsed
 948            into a6, freeing up a register.  */
 949         movi    a9, 0
 950         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 951         add     a6, a6, a11
 952         bgeu    a6, a11, 1f
 953         addi    a9, a9, 1
 954 1:
 955         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 956         add     a6, a6, a11
 957         bgeu    a6, a11, 1f
 958         addi    a9, a9, 1
 959 1:
 960         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 961         add     a6, a6, a11
 962         bgeu    a6, a11, 1f
 963         addi    a9, a9, 1
 964 1:
 965         /* Collapse any nonzero bits from the low word into a6.  */
 966         beqz    a10, 1f
 967         movi    a11, 1
 968         or      a6, a6, a11
 969 1:
 970         /* Add pp6-9 into a11 with carry-outs in a10.  */
 971         do_mul(a7, xl, l, yh, h)        /* pp 6 */
 972         do_mul(a11, xh, h, yl, l)       /* pp 9 */
 973         movi    a10, 0
 974         add     a11, a11, a7
 975         bgeu    a11, a7, 1f
 976         addi    a10, a10, 1
 977 1:
 978         do_mul(a7, xl, h, yh, l)        /* pp 7 */
 979         add     a11, a11, a7
 980         bgeu    a11, a7, 1f
 981         addi    a10, a10, 1
 982 1:
 983         do_mul(a7, xh, l, yl, h)        /* pp 8 */
 984         add     a11, a11, a7
 985         bgeu    a11, a7, 1f
 986         addi    a10, a10, 1
 987 1:
 988         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
 989         src     a10, a10, a11
 990         add     a10, a10, a9
 991         sll     a11, a11
 992         add     a6, a6, a11
 993         bgeu    a6, a11, 1f
 994         addi    a10, a10, 1
 995 1:
 996         /* Add pp10-12 into xl with carry-outs in a9.  */
 997         movi    a9, 0
 998         do_mul(xl, xl, h, yh, h)        /* pp 10 */
 999         add     xl, xl, a10
1000         bgeu    xl, a10, 1f
1001         addi    a9, a9, 1
1002 1:
1003         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1004         add     xl, xl, a10
1005         bgeu    xl, a10, 1f
1006         addi    a9, a9, 1
1007 1:
1008         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1009         add     xl, xl, a10
1010         bgeu    xl, a10, 1f
1011         addi    a9, a9, 1
1012 1:
1013         /* Add pp13-14 into a11 with carry-outs in a10.  */
1014         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1015         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1016         movi    a10, 0
1017         add     a11, a11, a7
1018         bgeu    a11, a7, 1f
1019         addi    a10, a10, 1
1020 1:
1021         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1022         src     a10, a10, a11
1023         add     a10, a10, a9
1024         sll     a11, a11
1025         add     xl, xl, a11
1026         bgeu    xl, a11, 1f
1027         addi    a10, a10, 1
1028 1:
1029         /* Compute xh.  */
1030         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1031         add     xh, xh, a10
1032
1033         /* Restore values saved on the stack during the multiplication.  */
1034         l32i    a7, sp, 4
1035 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
1036         l32i    a0, sp, 0
1037         l32i    a8, sp, 8
1038 #endif
1039 #endif
1040
1041         /* Shift left by 12 bits, unless there was a carry-out from the
1042            multiply, in which case, shift by 11 bits and increment the
1043            exponent.  Note: It is convenient to use the constant 0x3ff
1044            instead of 0x400 when removing the extra exponent bias (so that
1045            it is easy to construct 0x7fe for the overflow check).  Reverse
1046            the logic here to decrement the exponent sum by one unless there
1047            was a carry-out.  */
1048         movi    a4, 11
1049         srli    a5, xh, 21 - 12
1050         bnez    a5, 1f
1051         addi    a4, a4, 1
1052         addi    a8, a8, -1
1053 1:      ssl     a4
1054         src     xh, xh, xl
1055         src     xl, xl, a6
1056         sll     a6, a6
1057
1058         /* Subtract the extra bias from the exponent sum (plus one to account
1059            for the explicit "1.0" of the mantissa that will be added to the
1060            exponent in the final result).  */
1061         movi    a4, 0x3ff
1062         sub     a8, a8, a4
1063
1064         /* Check for over/underflow.  The value in a8 is one less than the
1065            final exponent, so values in the range 0..7fd are OK here.  */
1066         slli    a4, a4, 1       /* 0x7fe */
1067         bgeu    a8, a4, .Lmul_overflow
1068
1069 .Lmul_round:
1070         /* Round.  */
1071         bgez    a6, .Lmul_rounded
1072         addi    xl, xl, 1
1073         beqz    xl, .Lmul_roundcarry
1074         slli    a6, a6, 1
1075         beqz    a6, .Lmul_exactlyhalf
1076
1077 .Lmul_rounded:
1078         /* Add the exponent to the mantissa.  */
1079         slli    a8, a8, 20
1080         add     xh, xh, a8
1081
1082 .Lmul_addsign:
1083         /* Add the sign bit.  */
1084         srli    a7, a7, 31
1085         slli    a7, a7, 31
1086         or      xh, xh, a7
1087
1088 .Lmul_done:
1089 #if __XTENSA_CALL0_ABI__
1090         l32i    a12, sp, 16
1091         l32i    a13, sp, 20
1092         l32i    a14, sp, 24
1093         l32i    a15, sp, 28
1094         addi    sp, sp, 32
1095 #endif
1096         leaf_return
1097
1098 .Lmul_exactlyhalf:
1099         /* Round down to the nearest even value.  */
1100         srli    xl, xl, 1
1101         slli    xl, xl, 1
1102         j       .Lmul_rounded
1103
1104 .Lmul_roundcarry:
1105         /* xl is always zero when the rounding increment overflows, so
1106            there's no need to round it to an even value.  */
1107         addi    xh, xh, 1
1108         /* Overflow is OK -- it will be added to the exponent.  */
1109         j       .Lmul_rounded
1110
1111 .Lmul_overflow:
1112         bltz    a8, .Lmul_underflow
1113         /* Return +/- Infinity.  */
1114         addi    a8, a4, 1       /* 0x7ff */
1115         slli    xh, a8, 20
1116         movi    xl, 0
1117         j       .Lmul_addsign
1118
1119 .Lmul_underflow:
1120         /* Create a subnormal value, where the exponent field contains zero,
1121            but the effective exponent is 1.  The value of a8 is one less than
1122            the actual exponent, so just negate it to get the shift amount.  */
1123         neg     a8, a8
1124         mov     a9, a6
1125         ssr     a8
1126         bgeui   a8, 32, .Lmul_bigshift
1127
1128         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1129            in a6 (combined with the shifted-out bits currently in a6) for
1130            rounding the result.  */
1131         sll     a6, xl
1132         src     xl, xh, xl
1133         srl     xh, xh
1134         j       1f
1135
1136 .Lmul_bigshift:
1137         bgeui   a8, 64, .Lmul_flush_to_zero
1138         sll     a10, xl         /* lost bits shifted out of xl */
1139         src     a6, xh, xl
1140         srl     xl, xh
1141         movi    xh, 0
1142         or      a9, a9, a10
1143
1144         /* Set the exponent to zero.  */
1145 1:      movi    a8, 0
1146
1147         /* Pack any nonzero bits shifted out into a6.  */
1148         beqz    a9, .Lmul_round
1149         movi    a9, 1
1150         or      a6, a6, a9
1151         j       .Lmul_round
1152
1153 .Lmul_flush_to_zero:
1154         /* Return zero with the appropriate sign bit.  */
1155         srli    xh, a7, 31
1156         slli    xh, xh, 31
1157         movi    xl, 0
1158         j       .Lmul_done
1159
1160 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
1161
1162         /* For Xtensa processors with no multiply hardware, this simplified
1163            version of _mulsi3 is used for multiplying 16-bit chunks of
1164            the floating-point mantissas.  It uses a custom ABI: the inputs
1165            are passed in a13 and a14, the result is returned in a12, and
1166            a8 and a15 are clobbered.  */
1167         .align  4
1168 .Lmul_mulsi3:
1169         movi    a12, 0
1170 .Lmul_mult_loop:
1171         add     a15, a14, a12
1172         extui   a8, a13, 0, 1
1173         movnez  a12, a15, a8
1174
1175         do_addx2 a15, a14, a12, a15
1176         extui   a8, a13, 1, 1
1177         movnez  a12, a15, a8
1178
1179         do_addx4 a15, a14, a12, a15
1180         extui   a8, a13, 2, 1
1181         movnez  a12, a15, a8
1182
1183         do_addx8 a15, a14, a12, a15
1184         extui   a8, a13, 3, 1
1185         movnez  a12, a15, a8
1186
1187         srli    a13, a13, 4
1188         slli    a14, a14, 4
1189         bnez    a13, .Lmul_mult_loop
1190         ret
1191 #endif /* !MUL16 && !MUL32 && !MAC16 */
1192 #endif /* L_muldf3 */
1193
1194 #ifdef L_divdf3
1195
1196         /* Division */
1197 __divdf3_aux:
1198
1199         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1200            (This code is placed before the start of the function just to
1201            keep it in range of the limited branch displacements.)  */
1202
1203 .Ldiv_yexpzero:
1204         /* Clear the sign bit of y.  */
1205         slli    yh, yh, 1
1206         srli    yh, yh, 1
1207
1208         /* Check for division by zero.  */
1209         or      a10, yh, yl
1210         beqz    a10, .Ldiv_yzero
1211
1212         /* Normalize y.  Adjust the exponent in a9.  */
1213         beqz    yh, .Ldiv_yh_zero
1214         do_nsau a10, yh, a11, a9
1215         addi    a10, a10, -11
1216         ssl     a10
1217         src     yh, yh, yl
1218         sll     yl, yl
1219         movi    a9, 1
1220         sub     a9, a9, a10
1221         j       .Ldiv_ynormalized
1222 .Ldiv_yh_zero:
1223         do_nsau a10, yl, a11, a9
1224         addi    a10, a10, -11
1225         movi    a9, -31
1226         sub     a9, a9, a10
1227         ssl     a10
1228         bltz    a10, .Ldiv_yl_srl
1229         sll     yh, yl
1230         movi    yl, 0
1231         j       .Ldiv_ynormalized
1232 .Ldiv_yl_srl:
1233         srl     yh, yl
1234         sll     yl, yl
1235         j       .Ldiv_ynormalized
1236
1237 .Ldiv_yzero:
1238         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1239         slli    xh, xh, 1
1240         srli    xh, xh, 1
1241         or      xl, xl, xh
1242         srli    xh, a7, 31
1243         slli    xh, xh, 31
1244         or      xh, xh, a6
1245         bnez    xl, 1f
1246         movi    a4, 0x80000     /* make it a quiet NaN */
1247         or      xh, xh, a4
1248 1:      movi    xl, 0
1249         leaf_return
1250
1251 .Ldiv_xexpzero:
1252         /* Clear the sign bit of x.  */
1253         slli    xh, xh, 1
1254         srli    xh, xh, 1
1255
1256         /* If x is zero, return zero.  */
1257         or      a10, xh, xl
1258         beqz    a10, .Ldiv_return_zero
1259
1260         /* Normalize x.  Adjust the exponent in a8.  */
1261         beqz    xh, .Ldiv_xh_zero
1262         do_nsau a10, xh, a11, a8
1263         addi    a10, a10, -11
1264         ssl     a10
1265         src     xh, xh, xl
1266         sll     xl, xl
1267         movi    a8, 1
1268         sub     a8, a8, a10
1269         j       .Ldiv_xnormalized
1270 .Ldiv_xh_zero:
1271         do_nsau a10, xl, a11, a8
1272         addi    a10, a10, -11
1273         movi    a8, -31
1274         sub     a8, a8, a10
1275         ssl     a10
1276         bltz    a10, .Ldiv_xl_srl
1277         sll     xh, xl
1278         movi    xl, 0
1279         j       .Ldiv_xnormalized
1280 .Ldiv_xl_srl:
1281         srl     xh, xl
1282         sll     xl, xl
1283         j       .Ldiv_xnormalized
1284
1285 .Ldiv_return_zero:
1286         /* Return zero with the appropriate sign bit.  */
1287         srli    xh, a7, 31
1288         slli    xh, xh, 31
1289         movi    xl, 0
1290         leaf_return
1291
1292 .Ldiv_xnan_or_inf:
1293         /* Set the sign bit of the result.  */
1294         srli    a7, yh, 31
1295         slli    a7, a7, 31
1296         xor     xh, xh, a7
1297         /* If y is NaN or Inf, return NaN.  */
1298         bnall   yh, a6, 1f
1299         movi    a4, 0x80000     /* make it a quiet NaN */
1300         or      xh, xh, a4
1301 1:      leaf_return
1302
1303 .Ldiv_ynan_or_inf:
1304         /* If y is Infinity, return zero.  */
1305         slli    a8, yh, 12
1306         or      a8, a8, yl
1307         beqz    a8, .Ldiv_return_zero
1308         /* y is NaN; return it.  */
1309         mov     xh, yh
1310         mov     xl, yl
1311         leaf_return
1312
1313 .Ldiv_highequal1:
1314         bltu    xl, yl, 2f
1315         j       3f
1316
1317         .align  4
1318         .global __divdf3
1319         .type   __divdf3, @function
1320 __divdf3:
1321         leaf_entry sp, 16
1322         movi    a6, 0x7ff00000
1323
1324         /* Get the sign of the result.  */
1325         xor     a7, xh, yh
1326
1327         /* Check for NaN and infinity.  */
1328         ball    xh, a6, .Ldiv_xnan_or_inf
1329         ball    yh, a6, .Ldiv_ynan_or_inf
1330
1331         /* Extract the exponents.  */
1332         extui   a8, xh, 20, 11
1333         extui   a9, yh, 20, 11
1334
1335         beqz    a9, .Ldiv_yexpzero
1336 .Ldiv_ynormalized:
1337         beqz    a8, .Ldiv_xexpzero
1338 .Ldiv_xnormalized:
1339
1340         /* Subtract the exponents.  */
1341         sub     a8, a8, a9
1342
1343         /* Replace sign/exponent fields with explicit "1.0".  */
1344         movi    a10, 0x1fffff
1345         or      xh, xh, a6
1346         and     xh, xh, a10
1347         or      yh, yh, a6
1348         and     yh, yh, a10
1349
1350         /* Set SAR for left shift by one.  */
1351         ssai    (32 - 1)
1352
1353         /* The first digit of the mantissa division must be a one.
1354            Shift x (and adjust the exponent) as needed to make this true.  */
1355         bltu    yh, xh, 3f
1356         beq     yh, xh, .Ldiv_highequal1
1357 2:      src     xh, xh, xl
1358         sll     xl, xl
1359         addi    a8, a8, -1
1360 3:
1361         /* Do the first subtraction and shift.  */
1362         sub     xh, xh, yh
1363         bgeu    xl, yl, 1f
1364         addi    xh, xh, -1
1365 1:      sub     xl, xl, yl
1366         src     xh, xh, xl
1367         sll     xl, xl
1368
1369         /* Put the quotient into a10/a11.  */
1370         movi    a10, 0
1371         movi    a11, 1
1372
1373         /* Divide one bit at a time for 52 bits.  */
1374         movi    a9, 52
1375 #if XCHAL_HAVE_LOOPS
1376         loop    a9, .Ldiv_loopend
1377 #endif
1378 .Ldiv_loop:
1379         /* Shift the quotient << 1.  */
1380         src     a10, a10, a11
1381         sll     a11, a11
1382
1383         /* Is this digit a 0 or 1?  */
1384         bltu    xh, yh, 3f
1385         beq     xh, yh, .Ldiv_highequal2
1386
1387         /* Output a 1 and subtract.  */
1388 2:      addi    a11, a11, 1
1389         sub     xh, xh, yh
1390         bgeu    xl, yl, 1f
1391         addi    xh, xh, -1
1392 1:      sub     xl, xl, yl
1393
1394         /* Shift the dividend << 1.  */
1395 3:      src     xh, xh, xl
1396         sll     xl, xl
1397
1398 #if !XCHAL_HAVE_LOOPS
1399         addi    a9, a9, -1
1400         bnez    a9, .Ldiv_loop
1401 #endif
1402 .Ldiv_loopend:
1403
1404         /* Add the exponent bias (less one to account for the explicit "1.0"
1405            of the mantissa that will be added to the exponent in the final
1406            result).  */
1407         movi    a9, 0x3fe
1408         add     a8, a8, a9
1409
1410         /* Check for over/underflow.  The value in a8 is one less than the
1411            final exponent, so values in the range 0..7fd are OK here.  */
1412         addmi   a9, a9, 0x400   /* 0x7fe */
1413         bgeu    a8, a9, .Ldiv_overflow
1414
1415 .Ldiv_round:
1416         /* Round.  The remainder (<< 1) is in xh/xl.  */
1417         bltu    xh, yh, .Ldiv_rounded
1418         beq     xh, yh, .Ldiv_highequal3
1419 .Ldiv_roundup:
1420         addi    a11, a11, 1
1421         beqz    a11, .Ldiv_roundcarry
1422
1423 .Ldiv_rounded:
1424         mov     xl, a11
1425         /* Add the exponent to the mantissa.  */
1426         slli    a8, a8, 20
1427         add     xh, a10, a8
1428
1429 .Ldiv_addsign:
1430         /* Add the sign bit.  */
1431         srli    a7, a7, 31
1432         slli    a7, a7, 31
1433         or      xh, xh, a7
1434         leaf_return
1435
1436 .Ldiv_highequal2:
1437         bgeu    xl, yl, 2b
1438         j       3b
1439
1440 .Ldiv_highequal3:
1441         bltu    xl, yl, .Ldiv_rounded
1442         bne     xl, yl, .Ldiv_roundup
1443
1444         /* Remainder is exactly half the divisor.  Round even.  */
1445         addi    a11, a11, 1
1446         beqz    a11, .Ldiv_roundcarry
1447         srli    a11, a11, 1
1448         slli    a11, a11, 1
1449         j       .Ldiv_rounded
1450
1451 .Ldiv_overflow:
1452         bltz    a8, .Ldiv_underflow
1453         /* Return +/- Infinity.  */
1454         addi    a8, a9, 1       /* 0x7ff */
1455         slli    xh, a8, 20
1456         movi    xl, 0
1457         j       .Ldiv_addsign
1458
1459 .Ldiv_underflow:
1460         /* Create a subnormal value, where the exponent field contains zero,
1461            but the effective exponent is 1.  The value of a8 is one less than
1462            the actual exponent, so just negate it to get the shift amount.  */
1463         neg     a8, a8
1464         ssr     a8
1465         bgeui   a8, 32, .Ldiv_bigshift
1466
1467         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1468            saved in a6 for rounding the result.  */
1469         sll     a6, a11
1470         src     a11, a10, a11
1471         srl     a10, a10
1472         j       1f
1473
1474 .Ldiv_bigshift:
1475         bgeui   a8, 64, .Ldiv_flush_to_zero
1476         sll     a9, a11         /* lost bits shifted out of a11 */
1477         src     a6, a10, a11
1478         srl     a11, a10
1479         movi    a10, 0
1480         or      xl, xl, a9
1481
1482         /* Set the exponent to zero.  */
1483 1:      movi    a8, 0
1484
1485         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1486         or      xh, xh, xl
1487         beqz    xh, 1f
1488         movi    a9, 1
1489         or      a6, a6, a9
1490
1491         /* Round a10/a11 based on the bits shifted out into a6.  */
1492 1:      bgez    a6, .Ldiv_rounded
1493         addi    a11, a11, 1
1494         beqz    a11, .Ldiv_roundcarry
1495         slli    a6, a6, 1
1496         bnez    a6, .Ldiv_rounded
1497         srli    a11, a11, 1
1498         slli    a11, a11, 1
1499         j       .Ldiv_rounded
1500
1501 .Ldiv_roundcarry:
1502         /* a11 is always zero when the rounding increment overflows, so
1503            there's no need to round it to an even value.  */
1504         addi    a10, a10, 1
1505         /* Overflow to the exponent field is OK.  */
1506         j       .Ldiv_rounded
1507
1508 .Ldiv_flush_to_zero:
1509         /* Return zero with the appropriate sign bit.  */
1510         srli    xh, a7, 31
1511         slli    xh, xh, 31
1512         movi    xl, 0
1513         leaf_return
1514
1515 #endif /* L_divdf3 */
1516
1517 #ifdef L_cmpdf2
1518
1519         /* Equal and Not Equal */
1520
1521         .align  4
1522         .global __eqdf2
1523         .global __nedf2
1524         .set    __nedf2, __eqdf2
1525         .type   __eqdf2, @function
1526 __eqdf2:
1527         leaf_entry sp, 16
1528         bne     xl, yl, 2f
1529         bne     xh, yh, 4f
1530
1531         /* The values are equal but NaN != NaN.  Check the exponent.  */
1532         movi    a6, 0x7ff00000
1533         ball    xh, a6, 3f
1534
1535         /* Equal.  */
1536         movi    a2, 0
1537         leaf_return
1538
1539         /* Not equal.  */
1540 2:      movi    a2, 1
1541         leaf_return
1542
1543         /* Check if the mantissas are nonzero.  */
1544 3:      slli    a7, xh, 12
1545         or      a7, a7, xl
1546         j       5f
1547
1548         /* Check if x and y are zero with different signs.  */
1549 4:      or      a7, xh, yh
1550         slli    a7, a7, 1
1551         or      a7, a7, xl      /* xl == yl here */
1552
1553         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1554            or x when exponent(x) = 0x7ff and x == y.  */
1555 5:      movi    a2, 0
1556         movi    a3, 1
1557         movnez  a2, a3, a7
1558         leaf_return
1559
1560
1561         /* Greater Than */
1562
1563         .align  4
1564         .global __gtdf2
1565         .type   __gtdf2, @function
1566 __gtdf2:
1567         leaf_entry sp, 16
1568         movi    a6, 0x7ff00000
1569         ball    xh, a6, 2f
1570 1:      bnall   yh, a6, .Lle_cmp
1571
1572         /* Check if y is a NaN.  */
1573         slli    a7, yh, 12
1574         or      a7, a7, yl
1575         beqz    a7, .Lle_cmp
1576         movi    a2, 0
1577         leaf_return
1578
1579         /* Check if x is a NaN.  */
1580 2:      slli    a7, xh, 12
1581         or      a7, a7, xl
1582         beqz    a7, 1b
1583         movi    a2, 0
1584         leaf_return
1585
1586
1587         /* Less Than or Equal */
1588
1589         .align  4
1590         .global __ledf2
1591         .type   __ledf2, @function
1592 __ledf2:
1593         leaf_entry sp, 16
1594         movi    a6, 0x7ff00000
1595         ball    xh, a6, 2f
1596 1:      bnall   yh, a6, .Lle_cmp
1597
1598         /* Check if y is a NaN.  */
1599         slli    a7, yh, 12
1600         or      a7, a7, yl
1601         beqz    a7, .Lle_cmp
1602         movi    a2, 1
1603         leaf_return
1604
1605         /* Check if x is a NaN.  */
1606 2:      slli    a7, xh, 12
1607         or      a7, a7, xl
1608         beqz    a7, 1b
1609         movi    a2, 1
1610         leaf_return
1611
1612 .Lle_cmp:
1613         /* Check if x and y have different signs.  */
1614         xor     a7, xh, yh
1615         bltz    a7, .Lle_diff_signs
1616
1617         /* Check if x is negative.  */
1618         bltz    xh, .Lle_xneg
1619
1620         /* Check if x <= y.  */
1621         bltu    xh, yh, 4f
1622         bne     xh, yh, 5f
1623         bltu    yl, xl, 5f
1624 4:      movi    a2, 0
1625         leaf_return
1626
1627 .Lle_xneg:
1628         /* Check if y <= x.  */
1629         bltu    yh, xh, 4b
1630         bne     yh, xh, 5f
1631         bgeu    xl, yl, 4b
1632 5:      movi    a2, 1
1633         leaf_return
1634
1635 .Lle_diff_signs:
1636         bltz    xh, 4b
1637
1638         /* Check if both x and y are zero.  */
1639         or      a7, xh, yh
1640         slli    a7, a7, 1
1641         or      a7, a7, xl
1642         or      a7, a7, yl
1643         movi    a2, 1
1644         movi    a3, 0
1645         moveqz  a2, a3, a7
1646         leaf_return
1647
1648
1649         /* Greater Than or Equal */
1650
1651         .align  4
1652         .global __gedf2
1653         .type   __gedf2, @function
1654 __gedf2:
1655         leaf_entry sp, 16
1656         movi    a6, 0x7ff00000
1657         ball    xh, a6, 2f
1658 1:      bnall   yh, a6, .Llt_cmp
1659
1660         /* Check if y is a NaN.  */
1661         slli    a7, yh, 12
1662         or      a7, a7, yl
1663         beqz    a7, .Llt_cmp
1664         movi    a2, -1
1665         leaf_return
1666
1667         /* Check if x is a NaN.  */
1668 2:      slli    a7, xh, 12
1669         or      a7, a7, xl
1670         beqz    a7, 1b
1671         movi    a2, -1
1672         leaf_return
1673
1674
1675         /* Less Than */
1676
1677         .align  4
1678         .global __ltdf2
1679         .type   __ltdf2, @function
1680 __ltdf2:
1681         leaf_entry sp, 16
1682         movi    a6, 0x7ff00000
1683         ball    xh, a6, 2f
1684 1:      bnall   yh, a6, .Llt_cmp
1685
1686         /* Check if y is a NaN.  */
1687         slli    a7, yh, 12
1688         or      a7, a7, yl
1689         beqz    a7, .Llt_cmp
1690         movi    a2, 0
1691         leaf_return
1692
1693         /* Check if x is a NaN.  */
1694 2:      slli    a7, xh, 12
1695         or      a7, a7, xl
1696         beqz    a7, 1b
1697         movi    a2, 0
1698         leaf_return
1699
1700 .Llt_cmp:
1701         /* Check if x and y have different signs.  */
1702         xor     a7, xh, yh
1703         bltz    a7, .Llt_diff_signs
1704
1705         /* Check if x is negative.  */
1706         bltz    xh, .Llt_xneg
1707
1708         /* Check if x < y.  */
1709         bltu    xh, yh, 4f
1710         bne     xh, yh, 5f
1711         bgeu    xl, yl, 5f
1712 4:      movi    a2, -1
1713         leaf_return
1714
1715 .Llt_xneg:
1716         /* Check if y < x.  */
1717         bltu    yh, xh, 4b
1718         bne     yh, xh, 5f
1719         bltu    yl, xl, 4b
1720 5:      movi    a2, 0
1721         leaf_return
1722
1723 .Llt_diff_signs:
1724         bgez    xh, 5b
1725
1726         /* Check if both x and y are nonzero.  */
1727         or      a7, xh, yh
1728         slli    a7, a7, 1
1729         or      a7, a7, xl
1730         or      a7, a7, yl
1731         movi    a2, 0
1732         movi    a3, -1
1733         movnez  a2, a3, a7
1734         leaf_return
1735
1736
1737         /* Unordered */
1738
1739         .align  4
1740         .global __unorddf2
1741         .type   __unorddf2, @function
1742 __unorddf2:
1743         leaf_entry sp, 16
1744         movi    a6, 0x7ff00000
1745         ball    xh, a6, 3f
1746 1:      ball    yh, a6, 4f
1747 2:      movi    a2, 0
1748         leaf_return
1749
1750 3:      slli    a7, xh, 12
1751         or      a7, a7, xl
1752         beqz    a7, 1b
1753         movi    a2, 1
1754         leaf_return
1755
1756 4:      slli    a7, yh, 12
1757         or      a7, a7, yl
1758         beqz    a7, 2b
1759         movi    a2, 1
1760         leaf_return
1761
1762 #endif /* L_cmpdf2 */
1763
1764 #ifdef L_fixdfsi
1765
1766         .align  4
1767         .global __fixdfsi
1768         .type   __fixdfsi, @function
1769 __fixdfsi:
1770         leaf_entry sp, 16
1771
1772         /* Check for NaN and Infinity.  */
1773         movi    a6, 0x7ff00000
1774         ball    xh, a6, .Lfixdfsi_nan_or_inf
1775
1776         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1777         extui   a4, xh, 20, 11
1778         extui   a5, a6, 19, 10  /* 0x3fe */
1779         sub     a4, a4, a5
1780         bgei    a4, 32, .Lfixdfsi_maxint
1781         blti    a4, 1, .Lfixdfsi_zero
1782
1783         /* Add explicit "1.0" and shift << 11.  */
1784         or      a7, xh, a6
1785         ssai    (32 - 11)
1786         src     a5, a7, xl
1787
1788         /* Shift back to the right, based on the exponent.  */
1789         ssl     a4              /* shift by 32 - a4 */
1790         srl     a5, a5
1791
1792         /* Negate the result if sign != 0.  */
1793         neg     a2, a5
1794         movgez  a2, a5, a7
1795         leaf_return
1796
1797 .Lfixdfsi_nan_or_inf:
1798         /* Handle Infinity and NaN.  */
1799         slli    a4, xh, 12
1800         or      a4, a4, xl
1801         beqz    a4, .Lfixdfsi_maxint
1802
1803         /* Translate NaN to +maxint.  */
1804         movi    xh, 0
1805
1806 .Lfixdfsi_maxint:
1807         slli    a4, a6, 11      /* 0x80000000 */
1808         addi    a5, a4, -1      /* 0x7fffffff */
1809         movgez  a4, a5, xh
1810         mov     a2, a4
1811         leaf_return
1812
1813 .Lfixdfsi_zero:
1814         movi    a2, 0
1815         leaf_return
1816
1817 #endif /* L_fixdfsi */
1818
1819 #ifdef L_fixdfdi
1820
1821         .align  4
1822         .global __fixdfdi
1823         .type   __fixdfdi, @function
1824 __fixdfdi:
1825         leaf_entry sp, 16
1826
1827         /* Check for NaN and Infinity.  */
1828         movi    a6, 0x7ff00000
1829         ball    xh, a6, .Lfixdfdi_nan_or_inf
1830
1831         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1832         extui   a4, xh, 20, 11
1833         extui   a5, a6, 19, 10  /* 0x3fe */
1834         sub     a4, a4, a5
1835         bgei    a4, 64, .Lfixdfdi_maxint
1836         blti    a4, 1, .Lfixdfdi_zero
1837
1838         /* Add explicit "1.0" and shift << 11.  */
1839         or      a7, xh, a6
1840         ssai    (32 - 11)
1841         src     xh, a7, xl
1842         sll     xl, xl
1843
1844         /* Shift back to the right, based on the exponent.  */
1845         ssl     a4              /* shift by 64 - a4 */
1846         bgei    a4, 32, .Lfixdfdi_smallshift
1847         srl     xl, xh
1848         movi    xh, 0
1849
1850 .Lfixdfdi_shifted:
1851         /* Negate the result if sign != 0.  */
1852         bgez    a7, 1f
1853         neg     xl, xl
1854         neg     xh, xh
1855         beqz    xl, 1f
1856         addi    xh, xh, -1
1857 1:      leaf_return
1858
1859 .Lfixdfdi_smallshift:
1860         src     xl, xh, xl
1861         srl     xh, xh
1862         j       .Lfixdfdi_shifted
1863
1864 .Lfixdfdi_nan_or_inf:
1865         /* Handle Infinity and NaN.  */
1866         slli    a4, xh, 12
1867         or      a4, a4, xl
1868         beqz    a4, .Lfixdfdi_maxint
1869
1870         /* Translate NaN to +maxint.  */
1871         movi    xh, 0
1872
1873 .Lfixdfdi_maxint:
1874         slli    a7, a6, 11      /* 0x80000000 */
1875         bgez    xh, 1f
1876         mov     xh, a7
1877         movi    xl, 0
1878         leaf_return
1879
1880 1:      addi    xh, a7, -1      /* 0x7fffffff */
1881         movi    xl, -1
1882         leaf_return
1883
1884 .Lfixdfdi_zero:
1885         movi    xh, 0
1886         movi    xl, 0
1887         leaf_return
1888
1889 #endif /* L_fixdfdi */
1890
1891 #ifdef L_fixunsdfsi
1892
1893         .align  4
1894         .global __fixunsdfsi
1895         .type   __fixunsdfsi, @function
1896 __fixunsdfsi:
1897         leaf_entry sp, 16
1898
1899         /* Check for NaN and Infinity.  */
1900         movi    a6, 0x7ff00000
1901         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1902
1903         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1904         extui   a4, xh, 20, 11
1905         extui   a5, a6, 20, 10  /* 0x3ff */
1906         sub     a4, a4, a5
1907         bgei    a4, 32, .Lfixunsdfsi_maxint
1908         bltz    a4, .Lfixunsdfsi_zero
1909
1910         /* Add explicit "1.0" and shift << 11.  */
1911         or      a7, xh, a6
1912         ssai    (32 - 11)
1913         src     a5, a7, xl
1914
1915         /* Shift back to the right, based on the exponent.  */
1916         addi    a4, a4, 1
1917         beqi    a4, 32, .Lfixunsdfsi_bigexp
1918         ssl     a4              /* shift by 32 - a4 */
1919         srl     a5, a5
1920
1921         /* Negate the result if sign != 0.  */
1922         neg     a2, a5
1923         movgez  a2, a5, a7
1924         leaf_return
1925
1926 .Lfixunsdfsi_nan_or_inf:
1927         /* Handle Infinity and NaN.  */
1928         slli    a4, xh, 12
1929         or      a4, a4, xl
1930         beqz    a4, .Lfixunsdfsi_maxint
1931
1932         /* Translate NaN to 0xffffffff.  */
1933         movi    a2, -1
1934         leaf_return
1935
1936 .Lfixunsdfsi_maxint:
1937         slli    a4, a6, 11      /* 0x80000000 */
1938         movi    a5, -1          /* 0xffffffff */
1939         movgez  a4, a5, xh
1940         mov     a2, a4
1941         leaf_return
1942
1943 .Lfixunsdfsi_zero:
1944         movi    a2, 0
1945         leaf_return
1946
1947 .Lfixunsdfsi_bigexp:
1948         /* Handle unsigned maximum exponent case.  */
1949         bltz    xh, 1f
1950         mov     a2, a5          /* no shift needed */
1951         leaf_return
1952
1953         /* Return 0x80000000 if negative.  */
1954 1:      slli    a2, a6, 11
1955         leaf_return
1956
1957 #endif /* L_fixunsdfsi */
1958
1959 #ifdef L_fixunsdfdi
1960
1961         .align  4
1962         .global __fixunsdfdi
1963         .type   __fixunsdfdi, @function
1964 __fixunsdfdi:
1965         leaf_entry sp, 16
1966
1967         /* Check for NaN and Infinity.  */
1968         movi    a6, 0x7ff00000
1969         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
1970
1971         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
1972         extui   a4, xh, 20, 11
1973         extui   a5, a6, 20, 10  /* 0x3ff */
1974         sub     a4, a4, a5
1975         bgei    a4, 64, .Lfixunsdfdi_maxint
1976         bltz    a4, .Lfixunsdfdi_zero
1977
1978         /* Add explicit "1.0" and shift << 11.  */
1979         or      a7, xh, a6
1980         ssai    (32 - 11)
1981         src     xh, a7, xl
1982         sll     xl, xl
1983
1984         /* Shift back to the right, based on the exponent.  */
1985         addi    a4, a4, 1
1986         beqi    a4, 64, .Lfixunsdfdi_bigexp
1987         ssl     a4              /* shift by 64 - a4 */
1988         bgei    a4, 32, .Lfixunsdfdi_smallshift
1989         srl     xl, xh
1990         movi    xh, 0
1991
1992 .Lfixunsdfdi_shifted:
1993         /* Negate the result if sign != 0.  */
1994         bgez    a7, 1f
1995         neg     xl, xl
1996         neg     xh, xh
1997         beqz    xl, 1f
1998         addi    xh, xh, -1
1999 1:      leaf_return
2000
2001 .Lfixunsdfdi_smallshift:
2002         src     xl, xh, xl
2003         srl     xh, xh
2004         j       .Lfixunsdfdi_shifted
2005
2006 .Lfixunsdfdi_nan_or_inf:
2007         /* Handle Infinity and NaN.  */
2008         slli    a4, xh, 12
2009         or      a4, a4, xl
2010         beqz    a4, .Lfixunsdfdi_maxint
2011
2012         /* Translate NaN to 0xffffffff.... */
2013 1:      movi    xh, -1
2014         movi    xl, -1
2015         leaf_return
2016
2017 .Lfixunsdfdi_maxint:
2018         bgez    xh, 1b
2019 2:      slli    xh, a6, 11      /* 0x80000000 */
2020         movi    xl, 0
2021         leaf_return
2022
2023 .Lfixunsdfdi_zero:
2024         movi    xh, 0
2025         movi    xl, 0
2026         leaf_return
2027
2028 .Lfixunsdfdi_bigexp:
2029         /* Handle unsigned maximum exponent case.  */
2030         bltz    a7, 2b
2031         leaf_return             /* no shift needed */
2032
2033 #endif /* L_fixunsdfdi */
2034
2035 #ifdef L_floatsidf
2036
2037         .align  4
2038         .global __floatunsidf
2039         .type   __floatunsidf, @function
2040 __floatunsidf:
2041         leaf_entry sp, 16
2042         beqz    a2, .Lfloatsidf_return_zero
2043
2044         /* Set the sign to zero and jump to the floatsidf code.  */
2045         movi    a7, 0
2046         j       .Lfloatsidf_normalize
2047
2048         .align  4
2049         .global __floatsidf
2050         .type   __floatsidf, @function
2051 __floatsidf:
2052         leaf_entry sp, 16
2053
2054         /* Check for zero.  */
2055         beqz    a2, .Lfloatsidf_return_zero
2056
2057         /* Save the sign.  */
2058         extui   a7, a2, 31, 1
2059
2060         /* Get the absolute value.  */
2061 #if XCHAL_HAVE_ABS
2062         abs     a2, a2
2063 #else
2064         neg     a4, a2
2065         movltz  a2, a4, a2
2066 #endif
2067
2068 .Lfloatsidf_normalize:
2069         /* Normalize with the first 1 bit in the msb.  */
2070         do_nsau a4, a2, a5, a6
2071         ssl     a4
2072         sll     a5, a2
2073
2074         /* Shift the mantissa into position.  */
2075         srli    xh, a5, 11
2076         slli    xl, a5, (32 - 11)
2077
2078         /* Set the exponent.  */
2079         movi    a5, 0x41d       /* 0x3fe + 31 */
2080         sub     a5, a5, a4
2081         slli    a5, a5, 20
2082         add     xh, xh, a5
2083
2084         /* Add the sign and return. */
2085         slli    a7, a7, 31
2086         or      xh, xh, a7
2087         leaf_return
2088
2089 .Lfloatsidf_return_zero:
2090         movi    a3, 0
2091         leaf_return
2092
2093 #endif /* L_floatsidf */
2094
2095 #ifdef L_floatdidf
2096
2097         .align  4
2098         .global __floatundidf
2099         .type   __floatundidf, @function
2100 __floatundidf:
2101         leaf_entry sp, 16
2102
2103         /* Check for zero.  */
2104         or      a4, xh, xl
2105         beqz    a4, 2f
2106
2107         /* Set the sign to zero and jump to the floatdidf code.  */
2108         movi    a7, 0
2109         j       .Lfloatdidf_normalize
2110
2111         .align  4
2112         .global __floatdidf
2113         .type   __floatdidf, @function
2114 __floatdidf:
2115         leaf_entry sp, 16
2116
2117         /* Check for zero.  */
2118         or      a4, xh, xl
2119         beqz    a4, 2f
2120
2121         /* Save the sign.  */
2122         extui   a7, xh, 31, 1
2123
2124         /* Get the absolute value.  */
2125         bgez    xh, .Lfloatdidf_normalize
2126         neg     xl, xl
2127         neg     xh, xh
2128         beqz    xl, .Lfloatdidf_normalize
2129         addi    xh, xh, -1
2130
2131 .Lfloatdidf_normalize:
2132         /* Normalize with the first 1 bit in the msb of xh.  */
2133         beqz    xh, .Lfloatdidf_bigshift
2134         do_nsau a4, xh, a5, a6
2135         ssl     a4
2136         src     xh, xh, xl
2137         sll     xl, xl
2138
2139 .Lfloatdidf_shifted:
2140         /* Shift the mantissa into position, with rounding bits in a6.  */
2141         ssai    11
2142         sll     a6, xl
2143         src     xl, xh, xl
2144         srl     xh, xh
2145
2146         /* Set the exponent.  */
2147         movi    a5, 0x43d       /* 0x3fe + 63 */
2148         sub     a5, a5, a4
2149         slli    a5, a5, 20
2150         add     xh, xh, a5
2151
2152         /* Add the sign.  */
2153         slli    a7, a7, 31
2154         or      xh, xh, a7
2155
2156         /* Round up if the leftover fraction is >= 1/2.  */
2157         bgez    a6, 2f
2158         addi    xl, xl, 1
2159         beqz    xl, .Lfloatdidf_roundcarry
2160
2161         /* Check if the leftover fraction is exactly 1/2.  */
2162         slli    a6, a6, 1
2163         beqz    a6, .Lfloatdidf_exactlyhalf
2164 2:      leaf_return
2165
2166 .Lfloatdidf_bigshift:
2167         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2168         do_nsau a4, xl, a5, a6
2169         ssl     a4
2170         sll     xh, xl
2171         movi    xl, 0
2172         addi    a4, a4, 32
2173         j       .Lfloatdidf_shifted
2174
2175 .Lfloatdidf_exactlyhalf:
2176         /* Round down to the nearest even value.  */
2177         srli    xl, xl, 1
2178         slli    xl, xl, 1
2179         leaf_return
2180
2181 .Lfloatdidf_roundcarry:
2182         /* xl is always zero when the rounding increment overflows, so
2183            there's no need to round it to an even value.  */
2184         addi    xh, xh, 1
2185         /* Overflow to the exponent is OK.  */
2186         leaf_return
2187
2188 #endif /* L_floatdidf */
2189
2190 #ifdef L_truncdfsf2
2191
2192         .align  4
2193         .global __truncdfsf2
2194         .type   __truncdfsf2, @function
2195 __truncdfsf2:
2196         leaf_entry sp, 16
2197
2198         /* Adjust the exponent bias.  */
2199         movi    a4, (0x3ff - 0x7f) << 20
2200         sub     a5, xh, a4
2201
2202         /* Check for underflow.  */
2203         xor     a6, xh, a5
2204         bltz    a6, .Ltrunc_underflow
2205         extui   a6, a5, 20, 11
2206         beqz    a6, .Ltrunc_underflow
2207
2208         /* Check for overflow.  */
2209         movi    a4, 255
2210         bge     a6, a4, .Ltrunc_overflow
2211
2212         /* Shift a5/xl << 3 into a5/a4.  */
2213         ssai    (32 - 3)
2214         src     a5, a5, xl
2215         sll     a4, xl
2216
2217 .Ltrunc_addsign:
2218         /* Add the sign bit.  */
2219         extui   a6, xh, 31, 1
2220         slli    a6, a6, 31
2221         or      a2, a6, a5
2222
2223         /* Round up if the leftover fraction is >= 1/2.  */
2224         bgez    a4, 1f
2225         addi    a2, a2, 1
2226         /* Overflow to the exponent is OK.  The answer will be correct.  */
2227
2228         /* Check if the leftover fraction is exactly 1/2.  */
2229         slli    a4, a4, 1
2230         beqz    a4, .Ltrunc_exactlyhalf
2231 1:      leaf_return
2232
2233 .Ltrunc_exactlyhalf:
2234         /* Round down to the nearest even value.  */
2235         srli    a2, a2, 1
2236         slli    a2, a2, 1
2237         leaf_return
2238
2239 .Ltrunc_overflow:
2240         /* Check if exponent == 0x7ff.  */
2241         movi    a4, 0x7ff00000
2242         bnall   xh, a4, 1f
2243
2244         /* Check if mantissa is nonzero.  */
2245         slli    a5, xh, 12
2246         or      a5, a5, xl
2247         beqz    a5, 1f
2248
2249         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2250         srli    a4, a4, 1
2251
2252 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2253         /* Add the sign bit.  */
2254         extui   a6, xh, 31, 1
2255         ssai    1
2256         src     a2, a6, a4
2257         leaf_return
2258
2259 .Ltrunc_underflow:
2260         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2261         extui   a6, xh, 20, 11
2262         movi    a5, 0x3ff - 0x7f
2263         sub     a6, a5, a6
2264         addi    a6, a6, 1
2265         bgeui   a6, 32, 1f
2266
2267         /* Replace the exponent with an explicit "1.0".  */
2268         slli    a5, a5, 13      /* 0x700000 */
2269         or      a5, a5, xh
2270         slli    a5, a5, 11
2271         srli    a5, a5, 11
2272
2273         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2274         ssai    (32 - 3)
2275         src     a5, a5, xl
2276         sll     a4, xl
2277
2278         /* Shift right by a6.  */
2279         ssr     a6
2280         sll     a7, a4
2281         src     a4, a5, a4
2282         srl     a5, a5
2283         beqz    a7, .Ltrunc_addsign
2284         or      a4, a4, a6      /* any positive, nonzero value will work */
2285         j       .Ltrunc_addsign
2286
2287         /* Return +/- zero.  */
2288 1:      extui   a2, xh, 31, 1
2289         slli    a2, a2, 31
2290         leaf_return
2291
2292 #endif /* L_truncdfsf2 */
2293
2294 #ifdef L_extendsfdf2
2295
2296         .align  4
2297         .global __extendsfdf2
2298         .type   __extendsfdf2, @function
2299 __extendsfdf2:
2300         leaf_entry sp, 16
2301
2302         /* Save the sign bit and then shift it off.  */
2303         extui   a5, a2, 31, 1
2304         slli    a5, a5, 31
2305         slli    a4, a2, 1
2306
2307         /* Extract and check the exponent.  */
2308         extui   a6, a2, 23, 8
2309         beqz    a6, .Lextend_expzero
2310         addi    a6, a6, 1
2311         beqi    a6, 256, .Lextend_nan_or_inf
2312
2313         /* Shift >> 3 into a4/xl.  */
2314         srli    a4, a4, 4
2315         slli    xl, a2, (32 - 3)
2316
2317         /* Adjust the exponent bias.  */
2318         movi    a6, (0x3ff - 0x7f) << 20
2319         add     a4, a4, a6
2320
2321         /* Add the sign bit.  */
2322         or      xh, a4, a5
2323         leaf_return
2324
2325 .Lextend_nan_or_inf:
2326         movi    a4, 0x7ff00000
2327
2328         /* Check for NaN.  */
2329         slli    a7, a2, 9
2330         beqz    a7, 1f
2331
2332         slli    a6, a6, 11      /* 0x80000 */
2333         or      a4, a4, a6
2334
2335         /* Add the sign and return.  */
2336 1:      or      xh, a4, a5
2337         movi    xl, 0
2338         leaf_return
2339
2340 .Lextend_expzero:
2341         beqz    a4, 1b
2342
2343         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2344         do_nsau a7, a4, a2, a3
2345         addi    a7, a7, -8
2346         ssl     a7
2347         sll     a4, a4
2348
2349         /* Shift >> 3 into a4/xl.  */
2350         slli    xl, a4, (32 - 3)
2351         srli    a4, a4, 3
2352
2353         /* Set the exponent.  */
2354         movi    a6, 0x3fe - 0x7f
2355         sub     a6, a6, a7
2356         slli    a6, a6, 20
2357         add     a4, a4, a6
2358
2359         /* Add the sign and return.  */
2360         or      xh, a4, a5
2361         leaf_return
2362
2363 #endif /* L_extendsfdf2 */
2364
2365