gcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    In addition to the permissions in the GNU General Public License,
  13    the Free Software Foundation gives you unlimited permission to link
  14    the compiled version of this file into combinations with other
  15    programs, and to distribute those combinations without any
  16    restriction coming from the use of this file.  (The General Public
  17    License restrictions do apply in other respects; for example, they
  18    cover modification of the file, and distribution when not linked
  19    into a combine executable.)
  20
  21    GCC is distributed in the hope that it will be useful, but WITHOUT
  22    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  23    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  24    License for more details.
  25
  26    You should have received a copy of the GNU General Public License
  27    along with GCC; see the file COPYING.  If not, write to the Free
  28    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29    02110-1301, USA.  */
  30
  31 #ifdef __XTENSA_EB__
  32 #define xh a2
  33 #define xl a3
  34 #define yh a4
  35 #define yl a5
  36 #else
  37 #define xh a3
  38 #define xl a2
  39 #define yh a5
  40 #define yl a4
  41 #endif
  42
  43 /*  Warning!  The branch displacements for some Xtensa branch instructions
  44     are quite small, and this code has been carefully laid out to keep
  45     branch targets in range.  If you change anything, be sure to check that
  46     the assembler is not relaxing anything to branch over a jump.  */
  47
  48 #ifdef L_negdf2
  49
  50         .align  4
  51         .global __negdf2
  52         .type   __negdf2, @function
  53 __negdf2:
  54         leaf_entry sp, 16
  55         movi    a4, 0x80000000
  56         xor     xh, xh, a4
  57         leaf_return
  58
  59 #endif /* L_negdf2 */
  60
  61 #ifdef L_addsubdf3
  62
  63         /* Addition */
  64 __adddf3_aux:
  65
  66         /* Handle NaNs and Infinities.  (This code is placed before the
  67            start of the function just to keep it in range of the limited
  68            branch displacements.)  */
  69
  70 .Ladd_xnan_or_inf:
  71         /* If y is neither Infinity nor NaN, return x.  */
  72         bnall   yh, a6, 1f
  73         /* If x is a NaN, return it.  Otherwise, return y.  */
  74         slli    a7, xh, 12
  75         or      a7, a7, xl
  76         beqz    a7, .Ladd_ynan_or_inf
  77 1:      leaf_return
  78
  79 .Ladd_ynan_or_inf:
  80         /* Return y.  */
  81         mov     xh, yh
  82         mov     xl, yl
  83         leaf_return
  84
  85 .Ladd_opposite_signs:
  86         /* Operand signs differ.  Do a subtraction.  */
  87         slli    a7, a6, 11
  88         xor     yh, yh, a7
  89         j       .Lsub_same_sign
  90
  91         .align  4
  92         .global __adddf3
  93         .type   __adddf3, @function
  94 __adddf3:
  95         leaf_entry sp, 16
  96         movi    a6, 0x7ff00000
  97
  98         /* Check if the two operands have the same sign.  */
  99         xor     a7, xh, yh
 100         bltz    a7, .Ladd_opposite_signs
 101
 102 .Ladd_same_sign:
 103         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 104         ball    xh, a6, .Ladd_xnan_or_inf
 105         ball    yh, a6, .Ladd_ynan_or_inf
 106
 107         /* Compare the exponents.  The smaller operand will be shifted
 108            right by the exponent difference and added to the larger
 109            one.  */
 110         extui   a7, xh, 20, 12
 111         extui   a8, yh, 20, 12
 112         bltu    a7, a8, .Ladd_shiftx
 113
 114 .Ladd_shifty:
 115         /* Check if the smaller (or equal) exponent is zero.  */
 116         bnone   yh, a6, .Ladd_yexpzero
 117
 118         /* Replace yh sign/exponent with 0x001.  */
 119         or      yh, yh, a6
 120         slli    yh, yh, 11
 121         srli    yh, yh, 11
 122
 123 .Ladd_yexpdiff:
 124         /* Compute the exponent difference.  Optimize for difference < 32.  */
 125         sub     a10, a7, a8
 126         bgeui   a10, 32, .Ladd_bigshifty
 127
 128         /* Shift yh/yl right by the exponent difference.  Any bits that are
 129            shifted out of yl are saved in a9 for rounding the result.  */
 130         ssr     a10
 131         movi    a9, 0
 132         src     a9, yl, a9
 133         src     yl, yh, yl
 134         srl     yh, yh
 135
 136 .Ladd_addy:
 137         /* Do the 64-bit addition.  */
 138         add     xl, xl, yl
 139         add     xh, xh, yh
 140         bgeu    xl, yl, 1f
 141         addi    xh, xh, 1
 142 1:
 143         /* Check if the add overflowed into the exponent.  */
 144         extui   a10, xh, 20, 12
 145         beq     a10, a7, .Ladd_round
 146         mov     a8, a7
 147         j       .Ladd_carry
 148
 149 .Ladd_yexpzero:
 150         /* y is a subnormal value.  Replace its sign/exponent with zero,
 151            i.e., no implicit "1.0", and increment the apparent exponent
 152            because subnormals behave as if they had the minimum (nonzero)
 153            exponent.  Test for the case when both exponents are zero.  */
 154         slli    yh, yh, 12
 155         srli    yh, yh, 12
 156         bnone   xh, a6, .Ladd_bothexpzero
 157         addi    a8, a8, 1
 158         j       .Ladd_yexpdiff
 159
 160 .Ladd_bothexpzero:
 161         /* Both exponents are zero.  Handle this as a special case.  There
 162            is no need to shift or round, and the normal code for handling
 163            a carry into the exponent field will not work because it
 164            assumes there is an implicit "1.0" that needs to be added.  */
 165         add     xl, xl, yl
 166         add     xh, xh, yh
 167         bgeu    xl, yl, 1f
 168         addi    xh, xh, 1
 169 1:      leaf_return
 170
 171 .Ladd_bigshifty:
 172         /* Exponent difference > 64 -- just return the bigger value.  */
 173         bgeui   a10, 64, 1b
 174
 175         /* Shift yh/yl right by the exponent difference.  Any bits that are
 176            shifted out are saved in a9 for rounding the result.  */
 177         ssr     a10
 178         sll     a11, yl         /* lost bits shifted out of yl */
 179         src     a9, yh, yl
 180         srl     yl, yh
 181         movi    yh, 0
 182         beqz    a11, .Ladd_addy
 183         or      a9, a9, a10     /* any positive, nonzero value will work */
 184         j       .Ladd_addy
 185
 186 .Ladd_xexpzero:
 187         /* Same as "yexpzero" except skip handling the case when both
 188            exponents are zero.  */
 189         slli    xh, xh, 12
 190         srli    xh, xh, 12
 191         addi    a7, a7, 1
 192         j       .Ladd_xexpdiff
 193
 194 .Ladd_shiftx:
 195         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 196            because the exponent difference is always nonzero in this version,
 197            the shift sequence can use SLL and skip loading a constant zero.  */
 198         bnone   xh, a6, .Ladd_xexpzero
 199
 200         or      xh, xh, a6
 201         slli    xh, xh, 11
 202         srli    xh, xh, 11
 203
 204 .Ladd_xexpdiff:
 205         sub     a10, a8, a7
 206         bgeui   a10, 32, .Ladd_bigshiftx
 207
 208         ssr     a10
 209         sll     a9, xl
 210         src     xl, xh, xl
 211         srl     xh, xh
 212
 213 .Ladd_addx:
 214         add     xl, xl, yl
 215         add     xh, xh, yh
 216         bgeu    xl, yl, 1f
 217         addi    xh, xh, 1
 218 1:
 219         /* Check if the add overflowed into the exponent.  */
 220         extui   a10, xh, 20, 12
 221         bne     a10, a8, .Ladd_carry
 222
 223 .Ladd_round:
 224         /* Round up if the leftover fraction is >= 1/2.  */
 225         bgez    a9, 1f
 226         addi    xl, xl, 1
 227         beqz    xl, .Ladd_roundcarry
 228
 229         /* Check if the leftover fraction is exactly 1/2.  */
 230         slli    a9, a9, 1
 231         beqz    a9, .Ladd_exactlyhalf
 232 1:      leaf_return
 233
 234 .Ladd_bigshiftx:
 235         /* Mostly the same thing as "bigshifty"....  */
 236         bgeui   a10, 64, .Ladd_returny
 237
 238         ssr     a10
 239         sll     a11, xl
 240         src     a9, xh, xl
 241         srl     xl, xh
 242         movi    xh, 0
 243         beqz    a11, .Ladd_addx
 244         or      a9, a9, a10
 245         j       .Ladd_addx
 246
 247 .Ladd_returny:
 248         mov     xh, yh
 249         mov     xl, yl
 250         leaf_return
 251
 252 .Ladd_carry:
 253         /* The addition has overflowed into the exponent field, so the
 254            value needs to be renormalized.  The mantissa of the result
 255            can be recovered by subtracting the original exponent and
 256            adding 0x100000 (which is the explicit "1.0" for the
 257            mantissa of the non-shifted operand -- the "1.0" for the
 258            shifted operand was already added).  The mantissa can then
 259            be shifted right by one bit.  The explicit "1.0" of the
 260            shifted mantissa then needs to be replaced by the exponent,
 261            incremented by one to account for the normalizing shift.
 262            It is faster to combine these operations: do the shift first
 263            and combine the additions and subtractions.  If x is the
 264            original exponent, the result is:
 265                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 266            or:
 267                shifted mantissa + ((x + 1) << 19)
 268            Note that the exponent is incremented here by leaving the
 269            explicit "1.0" of the mantissa in the exponent field.  */
 270
 271         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 272         mov     a10, xl
 273         ssai    1
 274         src     xl, xh, xl
 275         srl     xh, xh
 276
 277         /* See explanation above.  The original exponent is in a8.  */
 278         addi    a8, a8, 1
 279         slli    a8, a8, 19
 280         add     xh, xh, a8
 281
 282         /* Return an Infinity if the exponent overflowed.  */
 283         ball    xh, a6, .Ladd_infinity
 284
 285         /* Same thing as the "round" code except the msb of the leftover
 286            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 287         bbci.l  a10, 0, 1f
 288         addi    xl, xl, 1
 289         beqz    xl, .Ladd_roundcarry
 290         beqz    a9, .Ladd_exactlyhalf
 291 1:      leaf_return
 292
 293 .Ladd_infinity:
 294         /* Clear the mantissa.  */
 295         movi    xl, 0
 296         srli    xh, xh, 20
 297         slli    xh, xh, 20
 298
 299         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 300         slli    a8, a8, 1
 301         or      xh, xh, a8
 302         leaf_return
 303
 304 .Ladd_exactlyhalf:
 305         /* Round down to the nearest even value.  */
 306         srli    xl, xl, 1
 307         slli    xl, xl, 1
 308         leaf_return
 309
 310 .Ladd_roundcarry:
 311         /* xl is always zero when the rounding increment overflows, so
 312            there's no need to round it to an even value.  */
 313         addi    xh, xh, 1
 314         /* Overflow to the exponent is OK.  */
 315         leaf_return
 316
 317
 318         /* Subtraction */
 319 __subdf3_aux:
 320
 321         /* Handle NaNs and Infinities.  (This code is placed before the
 322            start of the function just to keep it in range of the limited
 323            branch displacements.)  */
 324
 325 .Lsub_xnan_or_inf:
 326         /* If y is neither Infinity nor NaN, return x.  */
 327         bnall   yh, a6, 1f
 328         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 329         movi    a4, 0x80000     /* make it a quiet NaN */
 330         or      xh, xh, a4
 331 1:      leaf_return
 332
 333 .Lsub_ynan_or_inf:
 334         /* Negate y and return it.  */
 335         slli    a7, a6, 11
 336         xor     xh, yh, a7
 337         mov     xl, yl
 338         leaf_return
 339
 340 .Lsub_opposite_signs:
 341         /* Operand signs differ.  Do an addition.  */
 342         slli    a7, a6, 11
 343         xor     yh, yh, a7
 344         j       .Ladd_same_sign
 345
 346         .align  4
 347         .global __subdf3
 348         .type   __subdf3, @function
 349 __subdf3:
 350         leaf_entry sp, 16
 351         movi    a6, 0x7ff00000
 352
 353         /* Check if the two operands have the same sign.  */
 354         xor     a7, xh, yh
 355         bltz    a7, .Lsub_opposite_signs
 356
 357 .Lsub_same_sign:
 358         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 359         ball    xh, a6, .Lsub_xnan_or_inf
 360         ball    yh, a6, .Lsub_ynan_or_inf
 361
 362         /* Compare the operands.  In contrast to addition, the entire
 363            value matters here.  */
 364         extui   a7, xh, 20, 11
 365         extui   a8, yh, 20, 11
 366         bltu    xh, yh, .Lsub_xsmaller
 367         beq     xh, yh, .Lsub_compare_low
 368
 369 .Lsub_ysmaller:
 370         /* Check if the smaller (or equal) exponent is zero.  */
 371         bnone   yh, a6, .Lsub_yexpzero
 372
 373         /* Replace yh sign/exponent with 0x001.  */
 374         or      yh, yh, a6
 375         slli    yh, yh, 11
 376         srli    yh, yh, 11
 377
 378 .Lsub_yexpdiff:
 379         /* Compute the exponent difference.  Optimize for difference < 32.  */
 380         sub     a10, a7, a8
 381         bgeui   a10, 32, .Lsub_bigshifty
 382
 383         /* Shift yh/yl right by the exponent difference.  Any bits that are
 384            shifted out of yl are saved in a9 for rounding the result.  */
 385         ssr     a10
 386         movi    a9, 0
 387         src     a9, yl, a9
 388         src     yl, yh, yl
 389         srl     yh, yh
 390
 391 .Lsub_suby:
 392         /* Do the 64-bit subtraction.  */
 393         sub     xh, xh, yh
 394         bgeu    xl, yl, 1f
 395         addi    xh, xh, -1
 396 1:      sub     xl, xl, yl
 397
 398         /* Subtract the leftover bits in a9 from zero and propagate any
 399            borrow from xh/xl.  */
 400         neg     a9, a9
 401         beqz    a9, 1f
 402         addi    a5, xh, -1
 403         moveqz  xh, a5, xl
 404         addi    xl, xl, -1
 405 1:
 406         /* Check if the subtract underflowed into the exponent.  */
 407         extui   a10, xh, 20, 11
 408         beq     a10, a7, .Lsub_round
 409         j       .Lsub_borrow
 410
 411 .Lsub_compare_low:
 412         /* The high words are equal.  Compare the low words.  */
 413         bltu    xl, yl, .Lsub_xsmaller
 414         bltu    yl, xl, .Lsub_ysmaller
 415         /* The operands are equal.  Return 0.0.  */
 416         movi    xh, 0
 417         movi    xl, 0
 418 1:      leaf_return
 419
 420 .Lsub_yexpzero:
 421         /* y is a subnormal value.  Replace its sign/exponent with zero,
 422            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 423            y's apparent exponent because subnormals behave as if they had
 424            the minimum (nonzero) exponent.  */
 425         slli    yh, yh, 12
 426         srli    yh, yh, 12
 427         bnone   xh, a6, .Lsub_yexpdiff
 428         addi    a8, a8, 1
 429         j       .Lsub_yexpdiff
 430
 431 .Lsub_bigshifty:
 432         /* Exponent difference > 64 -- just return the bigger value.  */
 433         bgeui   a10, 64, 1b
 434
 435         /* Shift yh/yl right by the exponent difference.  Any bits that are
 436            shifted out are saved in a9 for rounding the result.  */
 437         ssr     a10
 438         sll     a11, yl         /* lost bits shifted out of yl */
 439         src     a9, yh, yl
 440         srl     yl, yh
 441         movi    yh, 0
 442         beqz    a11, .Lsub_suby
 443         or      a9, a9, a10     /* any positive, nonzero value will work */
 444         j       .Lsub_suby
 445
 446 .Lsub_xsmaller:
 447         /* Same thing as the "ysmaller" code, but with x and y swapped and
 448            with y negated.  */
 449         bnone   xh, a6, .Lsub_xexpzero
 450
 451         or      xh, xh, a6
 452         slli    xh, xh, 11
 453         srli    xh, xh, 11
 454
 455 .Lsub_xexpdiff:
 456         sub     a10, a8, a7
 457         bgeui   a10, 32, .Lsub_bigshiftx
 458
 459         ssr     a10
 460         movi    a9, 0
 461         src     a9, xl, a9
 462         src     xl, xh, xl
 463         srl     xh, xh
 464
 465         /* Negate y.  */
 466         slli    a11, a6, 11
 467         xor     yh, yh, a11
 468
 469 .Lsub_subx:
 470         sub     xl, yl, xl
 471         sub     xh, yh, xh
 472         bgeu    yl, xl, 1f
 473         addi    xh, xh, -1
 474 1:
 475         /* Subtract the leftover bits in a9 from zero and propagate any
 476            borrow from xh/xl.  */
 477         neg     a9, a9
 478         beqz    a9, 1f
 479         addi    a5, xh, -1
 480         moveqz  xh, a5, xl
 481         addi    xl, xl, -1
 482 1:
 483         /* Check if the subtract underflowed into the exponent.  */
 484         extui   a10, xh, 20, 11
 485         bne     a10, a8, .Lsub_borrow
 486
 487 .Lsub_round:
 488         /* Round up if the leftover fraction is >= 1/2.  */
 489         bgez    a9, 1f
 490         addi    xl, xl, 1
 491         beqz    xl, .Lsub_roundcarry
 492
 493         /* Check if the leftover fraction is exactly 1/2.  */
 494         slli    a9, a9, 1
 495         beqz    a9, .Lsub_exactlyhalf
 496 1:      leaf_return
 497
 498 .Lsub_xexpzero:
 499         /* Same as "yexpzero".  */
 500         slli    xh, xh, 12
 501         srli    xh, xh, 12
 502         bnone   yh, a6, .Lsub_xexpdiff
 503         addi    a7, a7, 1
 504         j       .Lsub_xexpdiff
 505
 506 .Lsub_bigshiftx:
 507         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 508            shifted value set so that the subsequent subtraction flips the
 509            sign of y.  */
 510         bgeui   a10, 64, .Lsub_returny
 511
 512         ssr     a10
 513         sll     a11, xl
 514         src     a9, xh, xl
 515         srl     xl, xh
 516         slli    xh, a6, 11      /* set sign bit of xh */
 517         beqz    a11, .Lsub_subx
 518         or      a9, a9, a10
 519         j       .Lsub_subx
 520
 521 .Lsub_returny:
 522         /* Negate and return y.  */
 523         slli    a7, a6, 11
 524         xor     xh, yh, a7
 525         mov     xl, yl
 526         leaf_return
 527
 528 .Lsub_borrow:
 529         /* The subtraction has underflowed into the exponent field, so the
 530            value needs to be renormalized.  Shift the mantissa left as
 531            needed to remove any leading zeros and adjust the exponent
 532            accordingly.  If the exponent is not large enough to remove
 533            all the leading zeros, the result will be a subnormal value.  */
 534
 535         slli    a8, xh, 12
 536         beqz    a8, .Lsub_xhzero
 537         do_nsau a6, a8, a7, a11
 538         srli    a8, a8, 12
 539         bge     a6, a10, .Lsub_subnormal
 540         addi    a6, a6, 1
 541
 542 .Lsub_shift_lt32:
 543         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 544         ssl     a6
 545         src     a8, a8, xl
 546         src     xl, xl, a9
 547         sll     a9, a9
 548
 549         /* Combine the shifted mantissa with the sign and exponent,
 550            decrementing the exponent by a6.  (The exponent has already
 551            been decremented by one due to the borrow from the subtraction,
 552            but adding the mantissa will increment the exponent by one.)  */
 553         srli    xh, xh, 20
 554         sub     xh, xh, a6
 555         slli    xh, xh, 20
 556         add     xh, xh, a8
 557         j       .Lsub_round
 558
 559 .Lsub_exactlyhalf:
 560         /* Round down to the nearest even value.  */
 561         srli    xl, xl, 1
 562         slli    xl, xl, 1
 563         leaf_return
 564
 565 .Lsub_roundcarry:
 566         /* xl is always zero when the rounding increment overflows, so
 567            there's no need to round it to an even value.  */
 568         addi    xh, xh, 1
 569         /* Overflow to the exponent is OK.  */
 570         leaf_return
 571
 572 .Lsub_xhzero:
 573         /* When normalizing the result, all the mantissa bits in the high
 574            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 575         do_nsau a6, xl, a7, a11
 576         addi    a6, a6, 21
 577         blt     a10, a6, .Lsub_subnormal
 578
 579 .Lsub_normalize_shift:
 580         bltui   a6, 32, .Lsub_shift_lt32
 581
 582         ssl     a6
 583         src     a8, xl, a9
 584         sll     xl, a9
 585         movi    a9, 0
 586
 587         srli    xh, xh, 20
 588         sub     xh, xh, a6
 589         slli    xh, xh, 20
 590         add     xh, xh, a8
 591         j       .Lsub_round
 592
 593 .Lsub_subnormal:
 594         /* The exponent is too small to shift away all the leading zeros.
 595            Set a6 to the current exponent (which has already been
 596            decremented by the borrow) so that the exponent of the result
 597            will be zero.  Do not add 1 to a6 in this case, because: (1)
 598            adding the mantissa will not increment the exponent, so there is
 599            no need to subtract anything extra from the exponent to
 600            compensate, and (2) the effective exponent of a subnormal is 1
 601            not 0 so the shift amount must be 1 smaller than normal. */
 602         mov     a6, a10
 603         j       .Lsub_normalize_shift
 604
 605 #endif /* L_addsubdf3 */
 606
 607 #ifdef L_muldf3
 608
 609         /* Multiplication */
 610 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 611 #define XCHAL_NO_MUL 1
 612 #endif
 613
 614 __muldf3_aux:
 615
 616         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 617            (This code is placed before the start of the function just to
 618            keep it in range of the limited branch displacements.)  */
 619
 620 .Lmul_xexpzero:
 621         /* Clear the sign bit of x.  */
 622         slli    xh, xh, 1
 623         srli    xh, xh, 1
 624
 625         /* If x is zero, return zero.  */
 626         or      a10, xh, xl
 627         beqz    a10, .Lmul_return_zero
 628
 629         /* Normalize x.  Adjust the exponent in a8.  */
 630         beqz    xh, .Lmul_xh_zero
 631         do_nsau a10, xh, a11, a12
 632         addi    a10, a10, -11
 633         ssl     a10
 634         src     xh, xh, xl
 635         sll     xl, xl
 636         movi    a8, 1
 637         sub     a8, a8, a10
 638         j       .Lmul_xnormalized
 639 .Lmul_xh_zero:
 640         do_nsau a10, xl, a11, a12
 641         addi    a10, a10, -11
 642         movi    a8, -31
 643         sub     a8, a8, a10
 644         ssl     a10
 645         bltz    a10, .Lmul_xl_srl
 646         sll     xh, xl
 647         movi    xl, 0
 648         j       .Lmul_xnormalized
 649 .Lmul_xl_srl:
 650         srl     xh, xl
 651         sll     xl, xl
 652         j       .Lmul_xnormalized
 653
 654 .Lmul_yexpzero:
 655         /* Clear the sign bit of y.  */
 656         slli    yh, yh, 1
 657         srli    yh, yh, 1
 658
 659         /* If y is zero, return zero.  */
 660         or      a10, yh, yl
 661         beqz    a10, .Lmul_return_zero
 662
 663         /* Normalize y.  Adjust the exponent in a9.  */
 664         beqz    yh, .Lmul_yh_zero
 665         do_nsau a10, yh, a11, a12
 666         addi    a10, a10, -11
 667         ssl     a10
 668         src     yh, yh, yl
 669         sll     yl, yl
 670         movi    a9, 1
 671         sub     a9, a9, a10
 672         j       .Lmul_ynormalized
 673 .Lmul_yh_zero:
 674         do_nsau a10, yl, a11, a12
 675         addi    a10, a10, -11
 676         movi    a9, -31
 677         sub     a9, a9, a10
 678         ssl     a10
 679         bltz    a10, .Lmul_yl_srl
 680         sll     yh, yl
 681         movi    yl, 0
 682         j       .Lmul_ynormalized
 683 .Lmul_yl_srl:
 684         srl     yh, yl
 685         sll     yl, yl
 686         j       .Lmul_ynormalized
 687
 688 .Lmul_return_zero:
 689         /* Return zero with the appropriate sign bit.  */
 690         srli    xh, a7, 31
 691         slli    xh, xh, 31
 692         movi    xl, 0
 693         j       .Lmul_done
 694
 695 .Lmul_xnan_or_inf:
 696         /* If y is zero, return NaN.  */
 697         bnez    yl, 1f
 698         slli    a8, yh, 1
 699         bnez    a8, 1f
 700         movi    a4, 0x80000     /* make it a quiet NaN */
 701         or      xh, xh, a4
 702         j       .Lmul_done
 703 1:
 704         /* If y is NaN, return y.  */
 705         bnall   yh, a6, .Lmul_returnx
 706         slli    a8, yh, 12
 707         or      a8, a8, yl
 708         beqz    a8, .Lmul_returnx
 709
 710 .Lmul_returny:
 711         mov     xh, yh
 712         mov     xl, yl
 713
 714 .Lmul_returnx:
 715         /* Set the sign bit and return.  */
 716         extui   a7, a7, 31, 1
 717         slli    xh, xh, 1
 718         ssai    1
 719         src     xh, a7, xh
 720         j       .Lmul_done
 721
 722 .Lmul_ynan_or_inf:
 723         /* If x is zero, return NaN.  */
 724         bnez    xl, .Lmul_returny
 725         slli    a8, xh, 1
 726         bnez    a8, .Lmul_returny
 727         movi    a7, 0x80000     /* make it a quiet NaN */
 728         or      xh, yh, a7
 729         j       .Lmul_done
 730
 731         .align  4
 732         .global __muldf3
 733         .type   __muldf3, @function
 734 __muldf3:
 735 #if __XTENSA_CALL0_ABI__
 736         leaf_entry sp, 32
 737         addi    sp, sp, -32
 738         s32i    a12, sp, 16
 739         s32i    a13, sp, 20
 740         s32i    a14, sp, 24
 741         s32i    a15, sp, 28
 742 #elif XCHAL_NO_MUL
 743         /* This is not really a leaf function; allocate enough stack space
 744            to allow CALL12s to a helper function.  */
 745         leaf_entry sp, 64
 746 #else
 747         leaf_entry sp, 32
 748 #endif
 749         movi    a6, 0x7ff00000
 750
 751         /* Get the sign of the result.  */
 752         xor     a7, xh, yh
 753
 754         /* Check for NaN and infinity.  */
 755         ball    xh, a6, .Lmul_xnan_or_inf
 756         ball    yh, a6, .Lmul_ynan_or_inf
 757
 758         /* Extract the exponents.  */
 759         extui   a8, xh, 20, 11
 760         extui   a9, yh, 20, 11
 761
 762         beqz    a8, .Lmul_xexpzero
 763 .Lmul_xnormalized:
 764         beqz    a9, .Lmul_yexpzero
 765 .Lmul_ynormalized:
 766
 767         /* Add the exponents.  */
 768         add     a8, a8, a9
 769
 770         /* Replace sign/exponent fields with explicit "1.0".  */
 771         movi    a10, 0x1fffff
 772         or      xh, xh, a6
 773         and     xh, xh, a10
 774         or      yh, yh, a6
 775         and     yh, yh, a10
 776
 777         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 778            The least-significant word of the result is thrown away except
 779            that if it is nonzero, the lsb of a6 is set to 1.  */
 780 #if XCHAL_HAVE_MUL32_HIGH
 781
 782         /* Compute a6 with any carry-outs in a10.  */
 783         movi    a10, 0
 784         mull    a6, xl, yh
 785         mull    a11, xh, yl
 786         add     a6, a6, a11
 787         bgeu    a6, a11, 1f
 788         addi    a10, a10, 1
 789 1:
 790         muluh   a11, xl, yl
 791         add     a6, a6, a11
 792         bgeu    a6, a11, 1f
 793         addi    a10, a10, 1
 794 1:
 795         /* If the low word of the result is nonzero, set the lsb of a6.  */
 796         mull    a11, xl, yl
 797         beqz    a11, 1f
 798         movi    a9, 1
 799         or      a6, a6, a9
 800 1:
 801         /* Compute xl with any carry-outs in a9.  */
 802         movi    a9, 0
 803         mull    a11, xh, yh
 804         add     a10, a10, a11
 805         bgeu    a10, a11, 1f
 806         addi    a9, a9, 1
 807 1:
 808         muluh   a11, xh, yl
 809         add     a10, a10, a11
 810         bgeu    a10, a11, 1f
 811         addi    a9, a9, 1
 812 1:
 813         muluh   xl, xl, yh
 814         add     xl, xl, a10
 815         bgeu    xl, a10, 1f
 816         addi    a9, a9, 1
 817 1:
 818         /* Compute xh.  */
 819         muluh   xh, xh, yh
 820         add     xh, xh, a9
 821
 822 #else /* ! XCHAL_HAVE_MUL32_HIGH */
 823
 824         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 825            products.  These partial products are:
 826
 827                 0 xll * yll
 828
 829                 1 xll * ylh
 830                 2 xlh * yll
 831
 832                 3 xll * yhl
 833                 4 xlh * ylh
 834                 5 xhl * yll
 835
 836                 6 xll * yhh
 837                 7 xlh * yhl
 838                 8 xhl * ylh
 839                 9 xhh * yll
 840
 841                 10 xlh * yhh
 842                 11 xhl * yhl
 843                 12 xhh * ylh
 844
 845                 13 xhl * yhh
 846                 14 xhh * yhl
 847
 848                 15 xhh * yhh
 849
 850            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 851            or Mul32 multiplier options, these input chunks must be stored in
 852            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 853            that the inputs come from either half of the registers, so there
 854            is no need to shift them out ahead of time.  If there is no
 855            multiply hardware, the 16-bit chunks can be extracted when setting
 856            up the arguments to the separate multiply function.  */
 857
 858         /* Save a7 since it is needed to hold a temporary value.  */
 859         s32i    a7, sp, 4
 860 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 861         /* Calling a separate multiply function will clobber a0 and requires
 862            use of a8 as a temporary, so save those values now.  (The function
 863            uses a custom ABI so nothing else needs to be saved.)  */
 864         s32i    a0, sp, 0
 865         s32i    a8, sp, 8
 866 #endif
 867
 868 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 869
 870 #define xlh a12
 871 #define ylh a13
 872 #define xhh a14
 873 #define yhh a15
 874
 875         /* Get the high halves of the inputs into registers.  */
 876         srli    xlh, xl, 16
 877         srli    ylh, yl, 16
 878         srli    xhh, xh, 16
 879         srli    yhh, yh, 16
 880
 881 #define xll xl
 882 #define yll yl
 883 #define xhl xh
 884 #define yhl yh
 885
 886 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 887         /* Clear the high halves of the inputs.  This does not matter
 888            for MUL16 because the high bits are ignored.  */
 889         extui   xl, xl, 0, 16
 890         extui   xh, xh, 0, 16
 891         extui   yl, yl, 0, 16
 892         extui   yh, yh, 0, 16
 893 #endif
 894 #endif /* MUL16 || MUL32 */
 895
 896
 897 #if XCHAL_HAVE_MUL16
 898
 899 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 900         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 901
 902 #elif XCHAL_HAVE_MUL32
 903
 904 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 905         mull    dst, xreg ## xhalf, yreg ## yhalf
 906
 907 #elif XCHAL_HAVE_MAC16
 908
 909 /* The preprocessor insists on inserting a space when concatenating after
 910    a period in the definition of do_mul below.  These macros are a workaround
 911    using underscores instead of periods when doing the concatenation.  */
 912 #define umul_aa_ll umul.aa.ll
 913 #define umul_aa_lh umul.aa.lh
 914 #define umul_aa_hl umul.aa.hl
 915 #define umul_aa_hh umul.aa.hh
 916
 917 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 918         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 919         rsr     dst, ACCLO
 920
 921 #else /* no multiply hardware */
 922
 923 #define set_arg_l(dst, src) \
 924         extui   dst, src, 0, 16
 925 #define set_arg_h(dst, src) \
 926         srli    dst, src, 16
 927
 928 #if __XTENSA_CALL0_ABI__
 929 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 930         set_arg_ ## xhalf (a13, xreg); \
 931         set_arg_ ## yhalf (a14, yreg); \
 932         call0   .Lmul_mulsi3; \
 933         mov     dst, a12
 934 #else
 935 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 936         set_arg_ ## xhalf (a14, xreg); \
 937         set_arg_ ## yhalf (a15, yreg); \
 938         call12  .Lmul_mulsi3; \
 939         mov     dst, a14
 940 #endif /* __XTENSA_CALL0_ABI__ */
 941
 942 #endif /* no multiply hardware */
 943
 944         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 945         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 946         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 947         movi    a9, 0
 948         add     a10, a10, a11
 949         bgeu    a10, a11, 1f
 950         addi    a9, a9, 1
 951 1:
 952         /* Initialize a6 with a9/a10 shifted into position.  Note that
 953            this value can be safely incremented without any carry-outs.  */
 954         ssai    16
 955         src     a6, a9, a10
 956
 957         /* Compute the low word into a10.  */
 958         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 959         sll     a10, a10
 960         add     a10, a10, a11
 961         bgeu    a10, a11, 1f
 962         addi    a6, a6, 1
 963 1:
 964         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 965            This is good enough to determine the low half of a6, so that any
 966            nonzero bits from the low word of the result can be collapsed
 967            into a6, freeing up a register.  */
 968         movi    a9, 0
 969         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 970         add     a6, a6, a11
 971         bgeu    a6, a11, 1f
 972         addi    a9, a9, 1
 973 1:
 974         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 975         add     a6, a6, a11
 976         bgeu    a6, a11, 1f
 977         addi    a9, a9, 1
 978 1:
 979         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 980         add     a6, a6, a11
 981         bgeu    a6, a11, 1f
 982         addi    a9, a9, 1
 983 1:
 984         /* Collapse any nonzero bits from the low word into a6.  */
 985         beqz    a10, 1f
 986         movi    a11, 1
 987         or      a6, a6, a11
 988 1:
 989         /* Add pp6-9 into a11 with carry-outs in a10.  */
 990         do_mul(a7, xl, l, yh, h)        /* pp 6 */
 991         do_mul(a11, xh, h, yl, l)       /* pp 9 */
 992         movi    a10, 0
 993         add     a11, a11, a7
 994         bgeu    a11, a7, 1f
 995         addi    a10, a10, 1
 996 1:
 997         do_mul(a7, xl, h, yh, l)        /* pp 7 */
 998         add     a11, a11, a7
 999         bgeu    a11, a7, 1f
1000         addi    a10, a10, 1
1001 1:
1002         do_mul(a7, xh, l, yl, h)        /* pp 8 */
1003         add     a11, a11, a7
1004         bgeu    a11, a7, 1f
1005         addi    a10, a10, 1
1006 1:
1007         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1008         src     a10, a10, a11
1009         add     a10, a10, a9
1010         sll     a11, a11
1011         add     a6, a6, a11
1012         bgeu    a6, a11, 1f
1013         addi    a10, a10, 1
1014 1:
1015         /* Add pp10-12 into xl with carry-outs in a9.  */
1016         movi    a9, 0
1017         do_mul(xl, xl, h, yh, h)        /* pp 10 */
1018         add     xl, xl, a10
1019         bgeu    xl, a10, 1f
1020         addi    a9, a9, 1
1021 1:
1022         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1023         add     xl, xl, a10
1024         bgeu    xl, a10, 1f
1025         addi    a9, a9, 1
1026 1:
1027         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1028         add     xl, xl, a10
1029         bgeu    xl, a10, 1f
1030         addi    a9, a9, 1
1031 1:
1032         /* Add pp13-14 into a11 with carry-outs in a10.  */
1033         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1034         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1035         movi    a10, 0
1036         add     a11, a11, a7
1037         bgeu    a11, a7, 1f
1038         addi    a10, a10, 1
1039 1:
1040         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1041         src     a10, a10, a11
1042         add     a10, a10, a9
1043         sll     a11, a11
1044         add     xl, xl, a11
1045         bgeu    xl, a11, 1f
1046         addi    a10, a10, 1
1047 1:
1048         /* Compute xh.  */
1049         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1050         add     xh, xh, a10
1051
1052         /* Restore values saved on the stack during the multiplication.  */
1053         l32i    a7, sp, 4
1054 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1055         l32i    a0, sp, 0
1056         l32i    a8, sp, 8
1057 #endif
1058 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1059
1060         /* Shift left by 12 bits, unless there was a carry-out from the
1061            multiply, in which case, shift by 11 bits and increment the
1062            exponent.  Note: It is convenient to use the constant 0x3ff
1063            instead of 0x400 when removing the extra exponent bias (so that
1064            it is easy to construct 0x7fe for the overflow check).  Reverse
1065            the logic here to decrement the exponent sum by one unless there
1066            was a carry-out.  */
1067         movi    a4, 11
1068         srli    a5, xh, 21 - 12
1069         bnez    a5, 1f
1070         addi    a4, a4, 1
1071         addi    a8, a8, -1
1072 1:      ssl     a4
1073         src     xh, xh, xl
1074         src     xl, xl, a6
1075         sll     a6, a6
1076
1077         /* Subtract the extra bias from the exponent sum (plus one to account
1078            for the explicit "1.0" of the mantissa that will be added to the
1079            exponent in the final result).  */
1080         movi    a4, 0x3ff
1081         sub     a8, a8, a4
1082
1083         /* Check for over/underflow.  The value in a8 is one less than the
1084            final exponent, so values in the range 0..7fd are OK here.  */
1085         slli    a4, a4, 1       /* 0x7fe */
1086         bgeu    a8, a4, .Lmul_overflow
1087
1088 .Lmul_round:
1089         /* Round.  */
1090         bgez    a6, .Lmul_rounded
1091         addi    xl, xl, 1
1092         beqz    xl, .Lmul_roundcarry
1093         slli    a6, a6, 1
1094         beqz    a6, .Lmul_exactlyhalf
1095
1096 .Lmul_rounded:
1097         /* Add the exponent to the mantissa.  */
1098         slli    a8, a8, 20
1099         add     xh, xh, a8
1100
1101 .Lmul_addsign:
1102         /* Add the sign bit.  */
1103         srli    a7, a7, 31
1104         slli    a7, a7, 31
1105         or      xh, xh, a7
1106
1107 .Lmul_done:
1108 #if __XTENSA_CALL0_ABI__
1109         l32i    a12, sp, 16
1110         l32i    a13, sp, 20
1111         l32i    a14, sp, 24
1112         l32i    a15, sp, 28
1113         addi    sp, sp, 32
1114 #endif
1115         leaf_return
1116
1117 .Lmul_exactlyhalf:
1118         /* Round down to the nearest even value.  */
1119         srli    xl, xl, 1
1120         slli    xl, xl, 1
1121         j       .Lmul_rounded
1122
1123 .Lmul_roundcarry:
1124         /* xl is always zero when the rounding increment overflows, so
1125            there's no need to round it to an even value.  */
1126         addi    xh, xh, 1
1127         /* Overflow is OK -- it will be added to the exponent.  */
1128         j       .Lmul_rounded
1129
1130 .Lmul_overflow:
1131         bltz    a8, .Lmul_underflow
1132         /* Return +/- Infinity.  */
1133         addi    a8, a4, 1       /* 0x7ff */
1134         slli    xh, a8, 20
1135         movi    xl, 0
1136         j       .Lmul_addsign
1137
1138 .Lmul_underflow:
1139         /* Create a subnormal value, where the exponent field contains zero,
1140            but the effective exponent is 1.  The value of a8 is one less than
1141            the actual exponent, so just negate it to get the shift amount.  */
1142         neg     a8, a8
1143         mov     a9, a6
1144         ssr     a8
1145         bgeui   a8, 32, .Lmul_bigshift
1146
1147         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1148            in a6 (combined with the shifted-out bits currently in a6) for
1149            rounding the result.  */
1150         sll     a6, xl
1151         src     xl, xh, xl
1152         srl     xh, xh
1153         j       1f
1154
1155 .Lmul_bigshift:
1156         bgeui   a8, 64, .Lmul_flush_to_zero
1157         sll     a10, xl         /* lost bits shifted out of xl */
1158         src     a6, xh, xl
1159         srl     xl, xh
1160         movi    xh, 0
1161         or      a9, a9, a10
1162
1163         /* Set the exponent to zero.  */
1164 1:      movi    a8, 0
1165
1166         /* Pack any nonzero bits shifted out into a6.  */
1167         beqz    a9, .Lmul_round
1168         movi    a9, 1
1169         or      a6, a6, a9
1170         j       .Lmul_round
1171
1172 .Lmul_flush_to_zero:
1173         /* Return zero with the appropriate sign bit.  */
1174         srli    xh, a7, 31
1175         slli    xh, xh, 31
1176         movi    xl, 0
1177         j       .Lmul_done
1178
1179 #if XCHAL_NO_MUL
1180
1181         /* For Xtensa processors with no multiply hardware, this simplified
1182            version of _mulsi3 is used for multiplying 16-bit chunks of
1183            the floating-point mantissas.  When using CALL0, this function
1184            uses a custom ABI: the inputs are passed in a13 and a14, the
1185            result is returned in a12, and a8 and a15 are clobbered.  */
1186         .align  4
1187 .Lmul_mulsi3:
1188         leaf_entry sp, 16
1189         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1190         movi    \dst, 0
1191 1:      add     \tmp1, \src2, \dst
1192         extui   \tmp2, \src1, 0, 1
1193         movnez  \dst, \tmp1, \tmp2
1194
1195         do_addx2 \tmp1, \src2, \dst, \tmp1
1196         extui   \tmp2, \src1, 1, 1
1197         movnez  \dst, \tmp1, \tmp2
1198
1199         do_addx4 \tmp1, \src2, \dst, \tmp1
1200         extui   \tmp2, \src1, 2, 1
1201         movnez  \dst, \tmp1, \tmp2
1202
1203         do_addx8 \tmp1, \src2, \dst, \tmp1
1204         extui   \tmp2, \src1, 3, 1
1205         movnez  \dst, \tmp1, \tmp2
1206
1207         srli    \src1, \src1, 4
1208         slli    \src2, \src2, 4
1209         bnez    \src1, 1b
1210         .endm
1211 #if __XTENSA_CALL0_ABI__
1212         mul_mulsi3_body a12, a13, a14, a15, a8
1213 #else
1214         /* The result will be written into a2, so save that argument in a4.  */
1215         mov     a4, a2
1216         mul_mulsi3_body a2, a4, a3, a5, a6
1217 #endif
1218         leaf_return
1219 #endif /* XCHAL_NO_MUL */
1220 #endif /* L_muldf3 */
1221
1222 #ifdef L_divdf3
1223
1224         /* Division */
1225 __divdf3_aux:
1226
1227         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1228            (This code is placed before the start of the function just to
1229            keep it in range of the limited branch displacements.)  */
1230
1231 .Ldiv_yexpzero:
1232         /* Clear the sign bit of y.  */
1233         slli    yh, yh, 1
1234         srli    yh, yh, 1
1235
1236         /* Check for division by zero.  */
1237         or      a10, yh, yl
1238         beqz    a10, .Ldiv_yzero
1239
1240         /* Normalize y.  Adjust the exponent in a9.  */
1241         beqz    yh, .Ldiv_yh_zero
1242         do_nsau a10, yh, a11, a9
1243         addi    a10, a10, -11
1244         ssl     a10
1245         src     yh, yh, yl
1246         sll     yl, yl
1247         movi    a9, 1
1248         sub     a9, a9, a10
1249         j       .Ldiv_ynormalized
1250 .Ldiv_yh_zero:
1251         do_nsau a10, yl, a11, a9
1252         addi    a10, a10, -11
1253         movi    a9, -31
1254         sub     a9, a9, a10
1255         ssl     a10
1256         bltz    a10, .Ldiv_yl_srl
1257         sll     yh, yl
1258         movi    yl, 0
1259         j       .Ldiv_ynormalized
1260 .Ldiv_yl_srl:
1261         srl     yh, yl
1262         sll     yl, yl
1263         j       .Ldiv_ynormalized
1264
1265 .Ldiv_yzero:
1266         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1267         slli    xh, xh, 1
1268         srli    xh, xh, 1
1269         or      xl, xl, xh
1270         srli    xh, a7, 31
1271         slli    xh, xh, 31
1272         or      xh, xh, a6
1273         bnez    xl, 1f
1274         movi    a4, 0x80000     /* make it a quiet NaN */
1275         or      xh, xh, a4
1276 1:      movi    xl, 0
1277         leaf_return
1278
1279 .Ldiv_xexpzero:
1280         /* Clear the sign bit of x.  */
1281         slli    xh, xh, 1
1282         srli    xh, xh, 1
1283
1284         /* If x is zero, return zero.  */
1285         or      a10, xh, xl
1286         beqz    a10, .Ldiv_return_zero
1287
1288         /* Normalize x.  Adjust the exponent in a8.  */
1289         beqz    xh, .Ldiv_xh_zero
1290         do_nsau a10, xh, a11, a8
1291         addi    a10, a10, -11
1292         ssl     a10
1293         src     xh, xh, xl
1294         sll     xl, xl
1295         movi    a8, 1
1296         sub     a8, a8, a10
1297         j       .Ldiv_xnormalized
1298 .Ldiv_xh_zero:
1299         do_nsau a10, xl, a11, a8
1300         addi    a10, a10, -11
1301         movi    a8, -31
1302         sub     a8, a8, a10
1303         ssl     a10
1304         bltz    a10, .Ldiv_xl_srl
1305         sll     xh, xl
1306         movi    xl, 0
1307         j       .Ldiv_xnormalized
1308 .Ldiv_xl_srl:
1309         srl     xh, xl
1310         sll     xl, xl
1311         j       .Ldiv_xnormalized
1312
1313 .Ldiv_return_zero:
1314         /* Return zero with the appropriate sign bit.  */
1315         srli    xh, a7, 31
1316         slli    xh, xh, 31
1317         movi    xl, 0
1318         leaf_return
1319
1320 .Ldiv_xnan_or_inf:
1321         /* Set the sign bit of the result.  */
1322         srli    a7, yh, 31
1323         slli    a7, a7, 31
1324         xor     xh, xh, a7
1325         /* If y is NaN or Inf, return NaN.  */
1326         bnall   yh, a6, 1f
1327         movi    a4, 0x80000     /* make it a quiet NaN */
1328         or      xh, xh, a4
1329 1:      leaf_return
1330
1331 .Ldiv_ynan_or_inf:
1332         /* If y is Infinity, return zero.  */
1333         slli    a8, yh, 12
1334         or      a8, a8, yl
1335         beqz    a8, .Ldiv_return_zero
1336         /* y is NaN; return it.  */
1337         mov     xh, yh
1338         mov     xl, yl
1339         leaf_return
1340
1341 .Ldiv_highequal1:
1342         bltu    xl, yl, 2f
1343         j       3f
1344
1345         .align  4
1346         .global __divdf3
1347         .type   __divdf3, @function
1348 __divdf3:
1349         leaf_entry sp, 16
1350         movi    a6, 0x7ff00000
1351
1352         /* Get the sign of the result.  */
1353         xor     a7, xh, yh
1354
1355         /* Check for NaN and infinity.  */
1356         ball    xh, a6, .Ldiv_xnan_or_inf
1357         ball    yh, a6, .Ldiv_ynan_or_inf
1358
1359         /* Extract the exponents.  */
1360         extui   a8, xh, 20, 11
1361         extui   a9, yh, 20, 11
1362
1363         beqz    a9, .Ldiv_yexpzero
1364 .Ldiv_ynormalized:
1365         beqz    a8, .Ldiv_xexpzero
1366 .Ldiv_xnormalized:
1367
1368         /* Subtract the exponents.  */
1369         sub     a8, a8, a9
1370
1371         /* Replace sign/exponent fields with explicit "1.0".  */
1372         movi    a10, 0x1fffff
1373         or      xh, xh, a6
1374         and     xh, xh, a10
1375         or      yh, yh, a6
1376         and     yh, yh, a10
1377
1378         /* Set SAR for left shift by one.  */
1379         ssai    (32 - 1)
1380
1381         /* The first digit of the mantissa division must be a one.
1382            Shift x (and adjust the exponent) as needed to make this true.  */
1383         bltu    yh, xh, 3f
1384         beq     yh, xh, .Ldiv_highequal1
1385 2:      src     xh, xh, xl
1386         sll     xl, xl
1387         addi    a8, a8, -1
1388 3:
1389         /* Do the first subtraction and shift.  */
1390         sub     xh, xh, yh
1391         bgeu    xl, yl, 1f
1392         addi    xh, xh, -1
1393 1:      sub     xl, xl, yl
1394         src     xh, xh, xl
1395         sll     xl, xl
1396
1397         /* Put the quotient into a10/a11.  */
1398         movi    a10, 0
1399         movi    a11, 1
1400
1401         /* Divide one bit at a time for 52 bits.  */
1402         movi    a9, 52
1403 #if XCHAL_HAVE_LOOPS
1404         loop    a9, .Ldiv_loopend
1405 #endif
1406 .Ldiv_loop:
1407         /* Shift the quotient << 1.  */
1408         src     a10, a10, a11
1409         sll     a11, a11
1410
1411         /* Is this digit a 0 or 1?  */
1412         bltu    xh, yh, 3f
1413         beq     xh, yh, .Ldiv_highequal2
1414
1415         /* Output a 1 and subtract.  */
1416 2:      addi    a11, a11, 1
1417         sub     xh, xh, yh
1418         bgeu    xl, yl, 1f
1419         addi    xh, xh, -1
1420 1:      sub     xl, xl, yl
1421
1422         /* Shift the dividend << 1.  */
1423 3:      src     xh, xh, xl
1424         sll     xl, xl
1425
1426 #if !XCHAL_HAVE_LOOPS
1427         addi    a9, a9, -1
1428         bnez    a9, .Ldiv_loop
1429 #endif
1430 .Ldiv_loopend:
1431
1432         /* Add the exponent bias (less one to account for the explicit "1.0"
1433            of the mantissa that will be added to the exponent in the final
1434            result).  */
1435         movi    a9, 0x3fe
1436         add     a8, a8, a9
1437
1438         /* Check for over/underflow.  The value in a8 is one less than the
1439            final exponent, so values in the range 0..7fd are OK here.  */
1440         addmi   a9, a9, 0x400   /* 0x7fe */
1441         bgeu    a8, a9, .Ldiv_overflow
1442
1443 .Ldiv_round:
1444         /* Round.  The remainder (<< 1) is in xh/xl.  */
1445         bltu    xh, yh, .Ldiv_rounded
1446         beq     xh, yh, .Ldiv_highequal3
1447 .Ldiv_roundup:
1448         addi    a11, a11, 1
1449         beqz    a11, .Ldiv_roundcarry
1450
1451 .Ldiv_rounded:
1452         mov     xl, a11
1453         /* Add the exponent to the mantissa.  */
1454         slli    a8, a8, 20
1455         add     xh, a10, a8
1456
1457 .Ldiv_addsign:
1458         /* Add the sign bit.  */
1459         srli    a7, a7, 31
1460         slli    a7, a7, 31
1461         or      xh, xh, a7
1462         leaf_return
1463
1464 .Ldiv_highequal2:
1465         bgeu    xl, yl, 2b
1466         j       3b
1467
1468 .Ldiv_highequal3:
1469         bltu    xl, yl, .Ldiv_rounded
1470         bne     xl, yl, .Ldiv_roundup
1471
1472         /* Remainder is exactly half the divisor.  Round even.  */
1473         addi    a11, a11, 1
1474         beqz    a11, .Ldiv_roundcarry
1475         srli    a11, a11, 1
1476         slli    a11, a11, 1
1477         j       .Ldiv_rounded
1478
1479 .Ldiv_overflow:
1480         bltz    a8, .Ldiv_underflow
1481         /* Return +/- Infinity.  */
1482         addi    a8, a9, 1       /* 0x7ff */
1483         slli    xh, a8, 20
1484         movi    xl, 0
1485         j       .Ldiv_addsign
1486
1487 .Ldiv_underflow:
1488         /* Create a subnormal value, where the exponent field contains zero,
1489            but the effective exponent is 1.  The value of a8 is one less than
1490            the actual exponent, so just negate it to get the shift amount.  */
1491         neg     a8, a8
1492         ssr     a8
1493         bgeui   a8, 32, .Ldiv_bigshift
1494
1495         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1496            saved in a6 for rounding the result.  */
1497         sll     a6, a11
1498         src     a11, a10, a11
1499         srl     a10, a10
1500         j       1f
1501
1502 .Ldiv_bigshift:
1503         bgeui   a8, 64, .Ldiv_flush_to_zero
1504         sll     a9, a11         /* lost bits shifted out of a11 */
1505         src     a6, a10, a11
1506         srl     a11, a10
1507         movi    a10, 0
1508         or      xl, xl, a9
1509
1510         /* Set the exponent to zero.  */
1511 1:      movi    a8, 0
1512
1513         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1514         or      xh, xh, xl
1515         beqz    xh, 1f
1516         movi    a9, 1
1517         or      a6, a6, a9
1518
1519         /* Round a10/a11 based on the bits shifted out into a6.  */
1520 1:      bgez    a6, .Ldiv_rounded
1521         addi    a11, a11, 1
1522         beqz    a11, .Ldiv_roundcarry
1523         slli    a6, a6, 1
1524         bnez    a6, .Ldiv_rounded
1525         srli    a11, a11, 1
1526         slli    a11, a11, 1
1527         j       .Ldiv_rounded
1528
1529 .Ldiv_roundcarry:
1530         /* a11 is always zero when the rounding increment overflows, so
1531            there's no need to round it to an even value.  */
1532         addi    a10, a10, 1
1533         /* Overflow to the exponent field is OK.  */
1534         j       .Ldiv_rounded
1535
1536 .Ldiv_flush_to_zero:
1537         /* Return zero with the appropriate sign bit.  */
1538         srli    xh, a7, 31
1539         slli    xh, xh, 31
1540         movi    xl, 0
1541         leaf_return
1542
1543 #endif /* L_divdf3 */
1544
1545 #ifdef L_cmpdf2
1546
1547         /* Equal and Not Equal */
1548
1549         .align  4
1550         .global __eqdf2
1551         .global __nedf2
1552         .set    __nedf2, __eqdf2
1553         .type   __eqdf2, @function
1554 __eqdf2:
1555         leaf_entry sp, 16
1556         bne     xl, yl, 2f
1557         bne     xh, yh, 4f
1558
1559         /* The values are equal but NaN != NaN.  Check the exponent.  */
1560         movi    a6, 0x7ff00000
1561         ball    xh, a6, 3f
1562
1563         /* Equal.  */
1564         movi    a2, 0
1565         leaf_return
1566
1567         /* Not equal.  */
1568 2:      movi    a2, 1
1569         leaf_return
1570
1571         /* Check if the mantissas are nonzero.  */
1572 3:      slli    a7, xh, 12
1573         or      a7, a7, xl
1574         j       5f
1575
1576         /* Check if x and y are zero with different signs.  */
1577 4:      or      a7, xh, yh
1578         slli    a7, a7, 1
1579         or      a7, a7, xl      /* xl == yl here */
1580
1581         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1582            or x when exponent(x) = 0x7ff and x == y.  */
1583 5:      movi    a2, 0
1584         movi    a3, 1
1585         movnez  a2, a3, a7
1586         leaf_return
1587
1588
1589         /* Greater Than */
1590
1591         .align  4
1592         .global __gtdf2
1593         .type   __gtdf2, @function
1594 __gtdf2:
1595         leaf_entry sp, 16
1596         movi    a6, 0x7ff00000
1597         ball    xh, a6, 2f
1598 1:      bnall   yh, a6, .Lle_cmp
1599
1600         /* Check if y is a NaN.  */
1601         slli    a7, yh, 12
1602         or      a7, a7, yl
1603         beqz    a7, .Lle_cmp
1604         movi    a2, 0
1605         leaf_return
1606
1607         /* Check if x is a NaN.  */
1608 2:      slli    a7, xh, 12
1609         or      a7, a7, xl
1610         beqz    a7, 1b
1611         movi    a2, 0
1612         leaf_return
1613
1614
1615         /* Less Than or Equal */
1616
1617         .align  4
1618         .global __ledf2
1619         .type   __ledf2, @function
1620 __ledf2:
1621         leaf_entry sp, 16
1622         movi    a6, 0x7ff00000
1623         ball    xh, a6, 2f
1624 1:      bnall   yh, a6, .Lle_cmp
1625
1626         /* Check if y is a NaN.  */
1627         slli    a7, yh, 12
1628         or      a7, a7, yl
1629         beqz    a7, .Lle_cmp
1630         movi    a2, 1
1631         leaf_return
1632
1633         /* Check if x is a NaN.  */
1634 2:      slli    a7, xh, 12
1635         or      a7, a7, xl
1636         beqz    a7, 1b
1637         movi    a2, 1
1638         leaf_return
1639
1640 .Lle_cmp:
1641         /* Check if x and y have different signs.  */
1642         xor     a7, xh, yh
1643         bltz    a7, .Lle_diff_signs
1644
1645         /* Check if x is negative.  */
1646         bltz    xh, .Lle_xneg
1647
1648         /* Check if x <= y.  */
1649         bltu    xh, yh, 4f
1650         bne     xh, yh, 5f
1651         bltu    yl, xl, 5f
1652 4:      movi    a2, 0
1653         leaf_return
1654
1655 .Lle_xneg:
1656         /* Check if y <= x.  */
1657         bltu    yh, xh, 4b
1658         bne     yh, xh, 5f
1659         bgeu    xl, yl, 4b
1660 5:      movi    a2, 1
1661         leaf_return
1662
1663 .Lle_diff_signs:
1664         bltz    xh, 4b
1665
1666         /* Check if both x and y are zero.  */
1667         or      a7, xh, yh
1668         slli    a7, a7, 1
1669         or      a7, a7, xl
1670         or      a7, a7, yl
1671         movi    a2, 1
1672         movi    a3, 0
1673         moveqz  a2, a3, a7
1674         leaf_return
1675
1676
1677         /* Greater Than or Equal */
1678
1679         .align  4
1680         .global __gedf2
1681         .type   __gedf2, @function
1682 __gedf2:
1683         leaf_entry sp, 16
1684         movi    a6, 0x7ff00000
1685         ball    xh, a6, 2f
1686 1:      bnall   yh, a6, .Llt_cmp
1687
1688         /* Check if y is a NaN.  */
1689         slli    a7, yh, 12
1690         or      a7, a7, yl
1691         beqz    a7, .Llt_cmp
1692         movi    a2, -1
1693         leaf_return
1694
1695         /* Check if x is a NaN.  */
1696 2:      slli    a7, xh, 12
1697         or      a7, a7, xl
1698         beqz    a7, 1b
1699         movi    a2, -1
1700         leaf_return
1701
1702
1703         /* Less Than */
1704
1705         .align  4
1706         .global __ltdf2
1707         .type   __ltdf2, @function
1708 __ltdf2:
1709         leaf_entry sp, 16
1710         movi    a6, 0x7ff00000
1711         ball    xh, a6, 2f
1712 1:      bnall   yh, a6, .Llt_cmp
1713
1714         /* Check if y is a NaN.  */
1715         slli    a7, yh, 12
1716         or      a7, a7, yl
1717         beqz    a7, .Llt_cmp
1718         movi    a2, 0
1719         leaf_return
1720
1721         /* Check if x is a NaN.  */
1722 2:      slli    a7, xh, 12
1723         or      a7, a7, xl
1724         beqz    a7, 1b
1725         movi    a2, 0
1726         leaf_return
1727
1728 .Llt_cmp:
1729         /* Check if x and y have different signs.  */
1730         xor     a7, xh, yh
1731         bltz    a7, .Llt_diff_signs
1732
1733         /* Check if x is negative.  */
1734         bltz    xh, .Llt_xneg
1735
1736         /* Check if x < y.  */
1737         bltu    xh, yh, 4f
1738         bne     xh, yh, 5f
1739         bgeu    xl, yl, 5f
1740 4:      movi    a2, -1
1741         leaf_return
1742
1743 .Llt_xneg:
1744         /* Check if y < x.  */
1745         bltu    yh, xh, 4b
1746         bne     yh, xh, 5f
1747         bltu    yl, xl, 4b
1748 5:      movi    a2, 0
1749         leaf_return
1750
1751 .Llt_diff_signs:
1752         bgez    xh, 5b
1753
1754         /* Check if both x and y are nonzero.  */
1755         or      a7, xh, yh
1756         slli    a7, a7, 1
1757         or      a7, a7, xl
1758         or      a7, a7, yl
1759         movi    a2, 0
1760         movi    a3, -1
1761         movnez  a2, a3, a7
1762         leaf_return
1763
1764
1765         /* Unordered */
1766
1767         .align  4
1768         .global __unorddf2
1769         .type   __unorddf2, @function
1770 __unorddf2:
1771         leaf_entry sp, 16
1772         movi    a6, 0x7ff00000
1773         ball    xh, a6, 3f
1774 1:      ball    yh, a6, 4f
1775 2:      movi    a2, 0
1776         leaf_return
1777
1778 3:      slli    a7, xh, 12
1779         or      a7, a7, xl
1780         beqz    a7, 1b
1781         movi    a2, 1
1782         leaf_return
1783
1784 4:      slli    a7, yh, 12
1785         or      a7, a7, yl
1786         beqz    a7, 2b
1787         movi    a2, 1
1788         leaf_return
1789
1790 #endif /* L_cmpdf2 */
1791
1792 #ifdef L_fixdfsi
1793
1794         .align  4
1795         .global __fixdfsi
1796         .type   __fixdfsi, @function
1797 __fixdfsi:
1798         leaf_entry sp, 16
1799
1800         /* Check for NaN and Infinity.  */
1801         movi    a6, 0x7ff00000
1802         ball    xh, a6, .Lfixdfsi_nan_or_inf
1803
1804         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1805         extui   a4, xh, 20, 11
1806         extui   a5, a6, 19, 10  /* 0x3fe */
1807         sub     a4, a4, a5
1808         bgei    a4, 32, .Lfixdfsi_maxint
1809         blti    a4, 1, .Lfixdfsi_zero
1810
1811         /* Add explicit "1.0" and shift << 11.  */
1812         or      a7, xh, a6
1813         ssai    (32 - 11)
1814         src     a5, a7, xl
1815
1816         /* Shift back to the right, based on the exponent.  */
1817         ssl     a4              /* shift by 32 - a4 */
1818         srl     a5, a5
1819
1820         /* Negate the result if sign != 0.  */
1821         neg     a2, a5
1822         movgez  a2, a5, a7
1823         leaf_return
1824
1825 .Lfixdfsi_nan_or_inf:
1826         /* Handle Infinity and NaN.  */
1827         slli    a4, xh, 12
1828         or      a4, a4, xl
1829         beqz    a4, .Lfixdfsi_maxint
1830
1831         /* Translate NaN to +maxint.  */
1832         movi    xh, 0
1833
1834 .Lfixdfsi_maxint:
1835         slli    a4, a6, 11      /* 0x80000000 */
1836         addi    a5, a4, -1      /* 0x7fffffff */
1837         movgez  a4, a5, xh
1838         mov     a2, a4
1839         leaf_return
1840
1841 .Lfixdfsi_zero:
1842         movi    a2, 0
1843         leaf_return
1844
1845 #endif /* L_fixdfsi */
1846
1847 #ifdef L_fixdfdi
1848
1849         .align  4
1850         .global __fixdfdi
1851         .type   __fixdfdi, @function
1852 __fixdfdi:
1853         leaf_entry sp, 16
1854
1855         /* Check for NaN and Infinity.  */
1856         movi    a6, 0x7ff00000
1857         ball    xh, a6, .Lfixdfdi_nan_or_inf
1858
1859         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1860         extui   a4, xh, 20, 11
1861         extui   a5, a6, 19, 10  /* 0x3fe */
1862         sub     a4, a4, a5
1863         bgei    a4, 64, .Lfixdfdi_maxint
1864         blti    a4, 1, .Lfixdfdi_zero
1865
1866         /* Add explicit "1.0" and shift << 11.  */
1867         or      a7, xh, a6
1868         ssai    (32 - 11)
1869         src     xh, a7, xl
1870         sll     xl, xl
1871
1872         /* Shift back to the right, based on the exponent.  */
1873         ssl     a4              /* shift by 64 - a4 */
1874         bgei    a4, 32, .Lfixdfdi_smallshift
1875         srl     xl, xh
1876         movi    xh, 0
1877
1878 .Lfixdfdi_shifted:
1879         /* Negate the result if sign != 0.  */
1880         bgez    a7, 1f
1881         neg     xl, xl
1882         neg     xh, xh
1883         beqz    xl, 1f
1884         addi    xh, xh, -1
1885 1:      leaf_return
1886
1887 .Lfixdfdi_smallshift:
1888         src     xl, xh, xl
1889         srl     xh, xh
1890         j       .Lfixdfdi_shifted
1891
1892 .Lfixdfdi_nan_or_inf:
1893         /* Handle Infinity and NaN.  */
1894         slli    a4, xh, 12
1895         or      a4, a4, xl
1896         beqz    a4, .Lfixdfdi_maxint
1897
1898         /* Translate NaN to +maxint.  */
1899         movi    xh, 0
1900
1901 .Lfixdfdi_maxint:
1902         slli    a7, a6, 11      /* 0x80000000 */
1903         bgez    xh, 1f
1904         mov     xh, a7
1905         movi    xl, 0
1906         leaf_return
1907
1908 1:      addi    xh, a7, -1      /* 0x7fffffff */
1909         movi    xl, -1
1910         leaf_return
1911
1912 .Lfixdfdi_zero:
1913         movi    xh, 0
1914         movi    xl, 0
1915         leaf_return
1916
1917 #endif /* L_fixdfdi */
1918
1919 #ifdef L_fixunsdfsi
1920
1921         .align  4
1922         .global __fixunsdfsi
1923         .type   __fixunsdfsi, @function
1924 __fixunsdfsi:
1925         leaf_entry sp, 16
1926
1927         /* Check for NaN and Infinity.  */
1928         movi    a6, 0x7ff00000
1929         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
1930
1931         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
1932         extui   a4, xh, 20, 11
1933         extui   a5, a6, 20, 10  /* 0x3ff */
1934         sub     a4, a4, a5
1935         bgei    a4, 32, .Lfixunsdfsi_maxint
1936         bltz    a4, .Lfixunsdfsi_zero
1937
1938         /* Add explicit "1.0" and shift << 11.  */
1939         or      a7, xh, a6
1940         ssai    (32 - 11)
1941         src     a5, a7, xl
1942
1943         /* Shift back to the right, based on the exponent.  */
1944         addi    a4, a4, 1
1945         beqi    a4, 32, .Lfixunsdfsi_bigexp
1946         ssl     a4              /* shift by 32 - a4 */
1947         srl     a5, a5
1948
1949         /* Negate the result if sign != 0.  */
1950         neg     a2, a5
1951         movgez  a2, a5, a7
1952         leaf_return
1953
1954 .Lfixunsdfsi_nan_or_inf:
1955         /* Handle Infinity and NaN.  */
1956         slli    a4, xh, 12
1957         or      a4, a4, xl
1958         beqz    a4, .Lfixunsdfsi_maxint
1959
1960         /* Translate NaN to 0xffffffff.  */
1961         movi    a2, -1
1962         leaf_return
1963
1964 .Lfixunsdfsi_maxint:
1965         slli    a4, a6, 11      /* 0x80000000 */
1966         movi    a5, -1          /* 0xffffffff */
1967         movgez  a4, a5, xh
1968         mov     a2, a4
1969         leaf_return
1970
1971 .Lfixunsdfsi_zero:
1972         movi    a2, 0
1973         leaf_return
1974
1975 .Lfixunsdfsi_bigexp:
1976         /* Handle unsigned maximum exponent case.  */
1977         bltz    xh, 1f
1978         mov     a2, a5          /* no shift needed */
1979         leaf_return
1980
1981         /* Return 0x80000000 if negative.  */
1982 1:      slli    a2, a6, 11
1983         leaf_return
1984
1985 #endif /* L_fixunsdfsi */
1986
1987 #ifdef L_fixunsdfdi
1988
1989         .align  4
1990         .global __fixunsdfdi
1991         .type   __fixunsdfdi, @function
1992 __fixunsdfdi:
1993         leaf_entry sp, 16
1994
1995         /* Check for NaN and Infinity.  */
1996         movi    a6, 0x7ff00000
1997         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
1998
1999         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
2000         extui   a4, xh, 20, 11
2001         extui   a5, a6, 20, 10  /* 0x3ff */
2002         sub     a4, a4, a5
2003         bgei    a4, 64, .Lfixunsdfdi_maxint
2004         bltz    a4, .Lfixunsdfdi_zero
2005
2006         /* Add explicit "1.0" and shift << 11.  */
2007         or      a7, xh, a6
2008         ssai    (32 - 11)
2009         src     xh, a7, xl
2010         sll     xl, xl
2011
2012         /* Shift back to the right, based on the exponent.  */
2013         addi    a4, a4, 1
2014         beqi    a4, 64, .Lfixunsdfdi_bigexp
2015         ssl     a4              /* shift by 64 - a4 */
2016         bgei    a4, 32, .Lfixunsdfdi_smallshift
2017         srl     xl, xh
2018         movi    xh, 0
2019
2020 .Lfixunsdfdi_shifted:
2021         /* Negate the result if sign != 0.  */
2022         bgez    a7, 1f
2023         neg     xl, xl
2024         neg     xh, xh
2025         beqz    xl, 1f
2026         addi    xh, xh, -1
2027 1:      leaf_return
2028
2029 .Lfixunsdfdi_smallshift:
2030         src     xl, xh, xl
2031         srl     xh, xh
2032         j       .Lfixunsdfdi_shifted
2033
2034 .Lfixunsdfdi_nan_or_inf:
2035         /* Handle Infinity and NaN.  */
2036         slli    a4, xh, 12
2037         or      a4, a4, xl
2038         beqz    a4, .Lfixunsdfdi_maxint
2039
2040         /* Translate NaN to 0xffffffff.... */
2041 1:      movi    xh, -1
2042         movi    xl, -1
2043         leaf_return
2044
2045 .Lfixunsdfdi_maxint:
2046         bgez    xh, 1b
2047 2:      slli    xh, a6, 11      /* 0x80000000 */
2048         movi    xl, 0
2049         leaf_return
2050
2051 .Lfixunsdfdi_zero:
2052         movi    xh, 0
2053         movi    xl, 0
2054         leaf_return
2055
2056 .Lfixunsdfdi_bigexp:
2057         /* Handle unsigned maximum exponent case.  */
2058         bltz    a7, 2b
2059         leaf_return             /* no shift needed */
2060
2061 #endif /* L_fixunsdfdi */
2062
2063 #ifdef L_floatsidf
2064
2065         .align  4
2066         .global __floatunsidf
2067         .type   __floatunsidf, @function
2068 __floatunsidf:
2069         leaf_entry sp, 16
2070         beqz    a2, .Lfloatsidf_return_zero
2071
2072         /* Set the sign to zero and jump to the floatsidf code.  */
2073         movi    a7, 0
2074         j       .Lfloatsidf_normalize
2075
2076         .align  4
2077         .global __floatsidf
2078         .type   __floatsidf, @function
2079 __floatsidf:
2080         leaf_entry sp, 16
2081
2082         /* Check for zero.  */
2083         beqz    a2, .Lfloatsidf_return_zero
2084
2085         /* Save the sign.  */
2086         extui   a7, a2, 31, 1
2087
2088         /* Get the absolute value.  */
2089 #if XCHAL_HAVE_ABS
2090         abs     a2, a2
2091 #else
2092         neg     a4, a2
2093         movltz  a2, a4, a2
2094 #endif
2095
2096 .Lfloatsidf_normalize:
2097         /* Normalize with the first 1 bit in the msb.  */
2098         do_nsau a4, a2, a5, a6
2099         ssl     a4
2100         sll     a5, a2
2101
2102         /* Shift the mantissa into position.  */
2103         srli    xh, a5, 11
2104         slli    xl, a5, (32 - 11)
2105
2106         /* Set the exponent.  */
2107         movi    a5, 0x41d       /* 0x3fe + 31 */
2108         sub     a5, a5, a4
2109         slli    a5, a5, 20
2110         add     xh, xh, a5
2111
2112         /* Add the sign and return. */
2113         slli    a7, a7, 31
2114         or      xh, xh, a7
2115         leaf_return
2116
2117 .Lfloatsidf_return_zero:
2118         movi    a3, 0
2119         leaf_return
2120
2121 #endif /* L_floatsidf */
2122
2123 #ifdef L_floatdidf
2124
2125         .align  4
2126         .global __floatundidf
2127         .type   __floatundidf, @function
2128 __floatundidf:
2129         leaf_entry sp, 16
2130
2131         /* Check for zero.  */
2132         or      a4, xh, xl
2133         beqz    a4, 2f
2134
2135         /* Set the sign to zero and jump to the floatdidf code.  */
2136         movi    a7, 0
2137         j       .Lfloatdidf_normalize
2138
2139         .align  4
2140         .global __floatdidf
2141         .type   __floatdidf, @function
2142 __floatdidf:
2143         leaf_entry sp, 16
2144
2145         /* Check for zero.  */
2146         or      a4, xh, xl
2147         beqz    a4, 2f
2148
2149         /* Save the sign.  */
2150         extui   a7, xh, 31, 1
2151
2152         /* Get the absolute value.  */
2153         bgez    xh, .Lfloatdidf_normalize
2154         neg     xl, xl
2155         neg     xh, xh
2156         beqz    xl, .Lfloatdidf_normalize
2157         addi    xh, xh, -1
2158
2159 .Lfloatdidf_normalize:
2160         /* Normalize with the first 1 bit in the msb of xh.  */
2161         beqz    xh, .Lfloatdidf_bigshift
2162         do_nsau a4, xh, a5, a6
2163         ssl     a4
2164         src     xh, xh, xl
2165         sll     xl, xl
2166
2167 .Lfloatdidf_shifted:
2168         /* Shift the mantissa into position, with rounding bits in a6.  */
2169         ssai    11
2170         sll     a6, xl
2171         src     xl, xh, xl
2172         srl     xh, xh
2173
2174         /* Set the exponent.  */
2175         movi    a5, 0x43d       /* 0x3fe + 63 */
2176         sub     a5, a5, a4
2177         slli    a5, a5, 20
2178         add     xh, xh, a5
2179
2180         /* Add the sign.  */
2181         slli    a7, a7, 31
2182         or      xh, xh, a7
2183
2184         /* Round up if the leftover fraction is >= 1/2.  */
2185         bgez    a6, 2f
2186         addi    xl, xl, 1
2187         beqz    xl, .Lfloatdidf_roundcarry
2188
2189         /* Check if the leftover fraction is exactly 1/2.  */
2190         slli    a6, a6, 1
2191         beqz    a6, .Lfloatdidf_exactlyhalf
2192 2:      leaf_return
2193
2194 .Lfloatdidf_bigshift:
2195         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2196         do_nsau a4, xl, a5, a6
2197         ssl     a4
2198         sll     xh, xl
2199         movi    xl, 0
2200         addi    a4, a4, 32
2201         j       .Lfloatdidf_shifted
2202
2203 .Lfloatdidf_exactlyhalf:
2204         /* Round down to the nearest even value.  */
2205         srli    xl, xl, 1
2206         slli    xl, xl, 1
2207         leaf_return
2208
2209 .Lfloatdidf_roundcarry:
2210         /* xl is always zero when the rounding increment overflows, so
2211            there's no need to round it to an even value.  */
2212         addi    xh, xh, 1
2213         /* Overflow to the exponent is OK.  */
2214         leaf_return
2215
2216 #endif /* L_floatdidf */
2217
2218 #ifdef L_truncdfsf2
2219
2220         .align  4
2221         .global __truncdfsf2
2222         .type   __truncdfsf2, @function
2223 __truncdfsf2:
2224         leaf_entry sp, 16
2225
2226         /* Adjust the exponent bias.  */
2227         movi    a4, (0x3ff - 0x7f) << 20
2228         sub     a5, xh, a4
2229
2230         /* Check for underflow.  */
2231         xor     a6, xh, a5
2232         bltz    a6, .Ltrunc_underflow
2233         extui   a6, a5, 20, 11
2234         beqz    a6, .Ltrunc_underflow
2235
2236         /* Check for overflow.  */
2237         movi    a4, 255
2238         bge     a6, a4, .Ltrunc_overflow
2239
2240         /* Shift a5/xl << 3 into a5/a4.  */
2241         ssai    (32 - 3)
2242         src     a5, a5, xl
2243         sll     a4, xl
2244
2245 .Ltrunc_addsign:
2246         /* Add the sign bit.  */
2247         extui   a6, xh, 31, 1
2248         slli    a6, a6, 31
2249         or      a2, a6, a5
2250
2251         /* Round up if the leftover fraction is >= 1/2.  */
2252         bgez    a4, 1f
2253         addi    a2, a2, 1
2254         /* Overflow to the exponent is OK.  The answer will be correct.  */
2255
2256         /* Check if the leftover fraction is exactly 1/2.  */
2257         slli    a4, a4, 1
2258         beqz    a4, .Ltrunc_exactlyhalf
2259 1:      leaf_return
2260
2261 .Ltrunc_exactlyhalf:
2262         /* Round down to the nearest even value.  */
2263         srli    a2, a2, 1
2264         slli    a2, a2, 1
2265         leaf_return
2266
2267 .Ltrunc_overflow:
2268         /* Check if exponent == 0x7ff.  */
2269         movi    a4, 0x7ff00000
2270         bnall   xh, a4, 1f
2271
2272         /* Check if mantissa is nonzero.  */
2273         slli    a5, xh, 12
2274         or      a5, a5, xl
2275         beqz    a5, 1f
2276
2277         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2278         srli    a4, a4, 1
2279
2280 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2281         /* Add the sign bit.  */
2282         extui   a6, xh, 31, 1
2283         ssai    1
2284         src     a2, a6, a4
2285         leaf_return
2286
2287 .Ltrunc_underflow:
2288         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2289         extui   a6, xh, 20, 11
2290         movi    a5, 0x3ff - 0x7f
2291         sub     a6, a5, a6
2292         addi    a6, a6, 1
2293         bgeui   a6, 32, 1f
2294
2295         /* Replace the exponent with an explicit "1.0".  */
2296         slli    a5, a5, 13      /* 0x700000 */
2297         or      a5, a5, xh
2298         slli    a5, a5, 11
2299         srli    a5, a5, 11
2300
2301         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2302         ssai    (32 - 3)
2303         src     a5, a5, xl
2304         sll     a4, xl
2305
2306         /* Shift right by a6.  */
2307         ssr     a6
2308         sll     a7, a4
2309         src     a4, a5, a4
2310         srl     a5, a5
2311         beqz    a7, .Ltrunc_addsign
2312         or      a4, a4, a6      /* any positive, nonzero value will work */
2313         j       .Ltrunc_addsign
2314
2315         /* Return +/- zero.  */
2316 1:      extui   a2, xh, 31, 1
2317         slli    a2, a2, 31
2318         leaf_return
2319
2320 #endif /* L_truncdfsf2 */
2321
2322 #ifdef L_extendsfdf2
2323
2324         .align  4
2325         .global __extendsfdf2
2326         .type   __extendsfdf2, @function
2327 __extendsfdf2:
2328         leaf_entry sp, 16
2329
2330         /* Save the sign bit and then shift it off.  */
2331         extui   a5, a2, 31, 1
2332         slli    a5, a5, 31
2333         slli    a4, a2, 1
2334
2335         /* Extract and check the exponent.  */
2336         extui   a6, a2, 23, 8
2337         beqz    a6, .Lextend_expzero
2338         addi    a6, a6, 1
2339         beqi    a6, 256, .Lextend_nan_or_inf
2340
2341         /* Shift >> 3 into a4/xl.  */
2342         srli    a4, a4, 4
2343         slli    xl, a2, (32 - 3)
2344
2345         /* Adjust the exponent bias.  */
2346         movi    a6, (0x3ff - 0x7f) << 20
2347         add     a4, a4, a6
2348
2349         /* Add the sign bit.  */
2350         or      xh, a4, a5
2351         leaf_return
2352
2353 .Lextend_nan_or_inf:
2354         movi    a4, 0x7ff00000
2355
2356         /* Check for NaN.  */
2357         slli    a7, a2, 9
2358         beqz    a7, 1f
2359
2360         slli    a6, a6, 11      /* 0x80000 */
2361         or      a4, a4, a6
2362
2363         /* Add the sign and return.  */
2364 1:      or      xh, a4, a5
2365         movi    xl, 0
2366         leaf_return
2367
2368 .Lextend_expzero:
2369         beqz    a4, 1b
2370
2371         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2372         do_nsau a7, a4, a2, a3
2373         addi    a7, a7, -8
2374         ssl     a7
2375         sll     a4, a4
2376
2377         /* Shift >> 3 into a4/xl.  */
2378         slli    xl, a4, (32 - 3)
2379         srli    a4, a4, 3
2380
2381         /* Set the exponent.  */
2382         movi    a6, 0x3fe - 0x7f
2383         sub     a6, a6, a7
2384         slli    a6, a6, 20
2385         add     a4, a4, a6
2386
2387         /* Add the sign and return.  */
2388         or      xh, a4, a5
2389         leaf_return
2390
2391 #endif /* L_extendsfdf2 */
2392
2393