libgcc/config/xtensa/ieee754-df.S

   1 /* IEEE-754 double-precision functions for Xtensa
   2    Copyright (C) 2006-2018 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but WITHOUT
  13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15    License for more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef __XTENSA_EB__
  27 #define xh a2
  28 #define xl a3
  29 #define yh a4
  30 #define yl a5
  31 #else
  32 #define xh a3
  33 #define xl a2
  34 #define yh a5
  35 #define yl a4
  36 #endif
  37
  38 /*  Warning!  The branch displacements for some Xtensa branch instructions
  39     are quite small, and this code has been carefully laid out to keep
  40     branch targets in range.  If you change anything, be sure to check that
  41     the assembler is not relaxing anything to branch over a jump.  */
  42
  43 #ifdef L_negdf2
  44
  45         .align  4
  46         .global __negdf2
  47         .type   __negdf2, @function
  48 __negdf2:
  49         leaf_entry sp, 16
  50         movi    a4, 0x80000000
  51         xor     xh, xh, a4
  52         leaf_return
  53
  54 #endif /* L_negdf2 */
  55
  56 #ifdef L_addsubdf3
  57
  58         /* Addition */
  59 __adddf3_aux:
  60
  61         /* Handle NaNs and Infinities.  (This code is placed before the
  62            start of the function just to keep it in range of the limited
  63            branch displacements.)  */
  64
  65 .Ladd_xnan_or_inf:
  66         /* If y is neither Infinity nor NaN, return x.  */
  67         bnall   yh, a6, .Ladd_return_nan_or_inf
  68         /* If x is a NaN, return it.  Otherwise, return y.  */
  69         slli    a7, xh, 12
  70         or      a7, a7, xl
  71         bnez    a7, .Ladd_return_nan
  72
  73 .Ladd_ynan_or_inf:
  74         /* Return y.  */
  75         mov     xh, yh
  76         mov     xl, yl
  77
  78 .Ladd_return_nan_or_inf:
  79         slli    a7, xh, 12
  80         or      a7, a7, xl
  81         bnez    a7, .Ladd_return_nan
  82         leaf_return
  83
  84 .Ladd_return_nan:
  85         movi    a4, 0x80000     /* make it a quiet NaN */
  86         or      xh, xh, a4
  87         leaf_return
  88
  89 .Ladd_opposite_signs:
  90         /* Operand signs differ.  Do a subtraction.  */
  91         slli    a7, a6, 11
  92         xor     yh, yh, a7
  93         j       .Lsub_same_sign
  94
  95         .align  4
  96         .global __adddf3
  97         .type   __adddf3, @function
  98 __adddf3:
  99         leaf_entry sp, 16
 100         movi    a6, 0x7ff00000
 101
 102         /* Check if the two operands have the same sign.  */
 103         xor     a7, xh, yh
 104         bltz    a7, .Ladd_opposite_signs
 105
 106 .Ladd_same_sign:
 107         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 108         ball    xh, a6, .Ladd_xnan_or_inf
 109         ball    yh, a6, .Ladd_ynan_or_inf
 110
 111         /* Compare the exponents.  The smaller operand will be shifted
 112            right by the exponent difference and added to the larger
 113            one.  */
 114         extui   a7, xh, 20, 12
 115         extui   a8, yh, 20, 12
 116         bltu    a7, a8, .Ladd_shiftx
 117
 118 .Ladd_shifty:
 119         /* Check if the smaller (or equal) exponent is zero.  */
 120         bnone   yh, a6, .Ladd_yexpzero
 121
 122         /* Replace yh sign/exponent with 0x001.  */
 123         or      yh, yh, a6
 124         slli    yh, yh, 11
 125         srli    yh, yh, 11
 126
 127 .Ladd_yexpdiff:
 128         /* Compute the exponent difference.  Optimize for difference < 32.  */
 129         sub     a10, a7, a8
 130         bgeui   a10, 32, .Ladd_bigshifty
 131
 132         /* Shift yh/yl right by the exponent difference.  Any bits that are
 133            shifted out of yl are saved in a9 for rounding the result.  */
 134         ssr     a10
 135         movi    a9, 0
 136         src     a9, yl, a9
 137         src     yl, yh, yl
 138         srl     yh, yh
 139
 140 .Ladd_addy:
 141         /* Do the 64-bit addition.  */
 142         add     xl, xl, yl
 143         add     xh, xh, yh
 144         bgeu    xl, yl, 1f
 145         addi    xh, xh, 1
 146 1:
 147         /* Check if the add overflowed into the exponent.  */
 148         extui   a10, xh, 20, 12
 149         beq     a10, a7, .Ladd_round
 150         mov     a8, a7
 151         j       .Ladd_carry
 152
 153 .Ladd_yexpzero:
 154         /* y is a subnormal value.  Replace its sign/exponent with zero,
 155            i.e., no implicit "1.0", and increment the apparent exponent
 156            because subnormals behave as if they had the minimum (nonzero)
 157            exponent.  Test for the case when both exponents are zero.  */
 158         slli    yh, yh, 12
 159         srli    yh, yh, 12
 160         bnone   xh, a6, .Ladd_bothexpzero
 161         addi    a8, a8, 1
 162         j       .Ladd_yexpdiff
 163
 164 .Ladd_bothexpzero:
 165         /* Both exponents are zero.  Handle this as a special case.  There
 166            is no need to shift or round, and the normal code for handling
 167            a carry into the exponent field will not work because it
 168            assumes there is an implicit "1.0" that needs to be added.  */
 169         add     xl, xl, yl
 170         add     xh, xh, yh
 171         bgeu    xl, yl, 1f
 172         addi    xh, xh, 1
 173 1:      leaf_return
 174
 175 .Ladd_bigshifty:
 176         /* Exponent difference > 64 -- just return the bigger value.  */
 177         bgeui   a10, 64, 1b
 178
 179         /* Shift yh/yl right by the exponent difference.  Any bits that are
 180            shifted out are saved in a9 for rounding the result.  */
 181         ssr     a10
 182         sll     a11, yl         /* lost bits shifted out of yl */
 183         src     a9, yh, yl
 184         srl     yl, yh
 185         movi    yh, 0
 186         beqz    a11, .Ladd_addy
 187         or      a9, a9, a10     /* any positive, nonzero value will work */
 188         j       .Ladd_addy
 189
 190 .Ladd_xexpzero:
 191         /* Same as "yexpzero" except skip handling the case when both
 192            exponents are zero.  */
 193         slli    xh, xh, 12
 194         srli    xh, xh, 12
 195         addi    a7, a7, 1
 196         j       .Ladd_xexpdiff
 197
 198 .Ladd_shiftx:
 199         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 200            because the exponent difference is always nonzero in this version,
 201            the shift sequence can use SLL and skip loading a constant zero.  */
 202         bnone   xh, a6, .Ladd_xexpzero
 203
 204         or      xh, xh, a6
 205         slli    xh, xh, 11
 206         srli    xh, xh, 11
 207
 208 .Ladd_xexpdiff:
 209         sub     a10, a8, a7
 210         bgeui   a10, 32, .Ladd_bigshiftx
 211
 212         ssr     a10
 213         sll     a9, xl
 214         src     xl, xh, xl
 215         srl     xh, xh
 216
 217 .Ladd_addx:
 218         add     xl, xl, yl
 219         add     xh, xh, yh
 220         bgeu    xl, yl, 1f
 221         addi    xh, xh, 1
 222 1:
 223         /* Check if the add overflowed into the exponent.  */
 224         extui   a10, xh, 20, 12
 225         bne     a10, a8, .Ladd_carry
 226
 227 .Ladd_round:
 228         /* Round up if the leftover fraction is >= 1/2.  */
 229         bgez    a9, 1f
 230         addi    xl, xl, 1
 231         beqz    xl, .Ladd_roundcarry
 232
 233         /* Check if the leftover fraction is exactly 1/2.  */
 234         slli    a9, a9, 1
 235         beqz    a9, .Ladd_exactlyhalf
 236 1:      leaf_return
 237
 238 .Ladd_bigshiftx:
 239         /* Mostly the same thing as "bigshifty"....  */
 240         bgeui   a10, 64, .Ladd_returny
 241
 242         ssr     a10
 243         sll     a11, xl
 244         src     a9, xh, xl
 245         srl     xl, xh
 246         movi    xh, 0
 247         beqz    a11, .Ladd_addx
 248         or      a9, a9, a10
 249         j       .Ladd_addx
 250
 251 .Ladd_returny:
 252         mov     xh, yh
 253         mov     xl, yl
 254         leaf_return
 255
 256 .Ladd_carry:
 257         /* The addition has overflowed into the exponent field, so the
 258            value needs to be renormalized.  The mantissa of the result
 259            can be recovered by subtracting the original exponent and
 260            adding 0x100000 (which is the explicit "1.0" for the
 261            mantissa of the non-shifted operand -- the "1.0" for the
 262            shifted operand was already added).  The mantissa can then
 263            be shifted right by one bit.  The explicit "1.0" of the
 264            shifted mantissa then needs to be replaced by the exponent,
 265            incremented by one to account for the normalizing shift.
 266            It is faster to combine these operations: do the shift first
 267            and combine the additions and subtractions.  If x is the
 268            original exponent, the result is:
 269                shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
 270            or:
 271                shifted mantissa + ((x + 1) << 19)
 272            Note that the exponent is incremented here by leaving the
 273            explicit "1.0" of the mantissa in the exponent field.  */
 274
 275         /* Shift xh/xl right by one bit.  Save the lsb of xl.  */
 276         mov     a10, xl
 277         ssai    1
 278         src     xl, xh, xl
 279         srl     xh, xh
 280
 281         /* See explanation above.  The original exponent is in a8.  */
 282         addi    a8, a8, 1
 283         slli    a8, a8, 19
 284         add     xh, xh, a8
 285
 286         /* Return an Infinity if the exponent overflowed.  */
 287         ball    xh, a6, .Ladd_infinity
 288
 289         /* Same thing as the "round" code except the msb of the leftover
 290            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 291         bbci.l  a10, 0, 1f
 292         addi    xl, xl, 1
 293         beqz    xl, .Ladd_roundcarry
 294         beqz    a9, .Ladd_exactlyhalf
 295 1:      leaf_return
 296
 297 .Ladd_infinity:
 298         /* Clear the mantissa.  */
 299         movi    xl, 0
 300         srli    xh, xh, 20
 301         slli    xh, xh, 20
 302
 303         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 304         slli    a8, a8, 1
 305         or      xh, xh, a8
 306         leaf_return
 307
 308 .Ladd_exactlyhalf:
 309         /* Round down to the nearest even value.  */
 310         srli    xl, xl, 1
 311         slli    xl, xl, 1
 312         leaf_return
 313
 314 .Ladd_roundcarry:
 315         /* xl is always zero when the rounding increment overflows, so
 316            there's no need to round it to an even value.  */
 317         addi    xh, xh, 1
 318         /* Overflow to the exponent is OK.  */
 319         leaf_return
 320
 321
 322         /* Subtraction */
 323 __subdf3_aux:
 324
 325         /* Handle NaNs and Infinities.  (This code is placed before the
 326            start of the function just to keep it in range of the limited
 327            branch displacements.)  */
 328
 329 .Lsub_xnan_or_inf:
 330         /* If y is neither Infinity nor NaN, return x.  */
 331         bnall   yh, a6, .Lsub_return_nan_or_inf
 332
 333 .Lsub_return_nan:
 334         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 335         movi    a4, 0x80000     /* make it a quiet NaN */
 336         or      xh, xh, a4
 337         leaf_return
 338
 339 .Lsub_ynan_or_inf:
 340         /* Negate y and return it.  */
 341         slli    a7, a6, 11
 342         xor     xh, yh, a7
 343         mov     xl, yl
 344
 345 .Lsub_return_nan_or_inf:
 346         slli    a7, xh, 12
 347         or      a7, a7, xl
 348         bnez    a7, .Lsub_return_nan
 349         leaf_return
 350
 351 .Lsub_opposite_signs:
 352         /* Operand signs differ.  Do an addition.  */
 353         slli    a7, a6, 11
 354         xor     yh, yh, a7
 355         j       .Ladd_same_sign
 356
 357         .align  4
 358         .global __subdf3
 359         .type   __subdf3, @function
 360 __subdf3:
 361         leaf_entry sp, 16
 362         movi    a6, 0x7ff00000
 363
 364         /* Check if the two operands have the same sign.  */
 365         xor     a7, xh, yh
 366         bltz    a7, .Lsub_opposite_signs
 367
 368 .Lsub_same_sign:
 369         /* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
 370         ball    xh, a6, .Lsub_xnan_or_inf
 371         ball    yh, a6, .Lsub_ynan_or_inf
 372
 373         /* Compare the operands.  In contrast to addition, the entire
 374            value matters here.  */
 375         extui   a7, xh, 20, 11
 376         extui   a8, yh, 20, 11
 377         bltu    xh, yh, .Lsub_xsmaller
 378         beq     xh, yh, .Lsub_compare_low
 379
 380 .Lsub_ysmaller:
 381         /* Check if the smaller (or equal) exponent is zero.  */
 382         bnone   yh, a6, .Lsub_yexpzero
 383
 384         /* Replace yh sign/exponent with 0x001.  */
 385         or      yh, yh, a6
 386         slli    yh, yh, 11
 387         srli    yh, yh, 11
 388
 389 .Lsub_yexpdiff:
 390         /* Compute the exponent difference.  Optimize for difference < 32.  */
 391         sub     a10, a7, a8
 392         bgeui   a10, 32, .Lsub_bigshifty
 393
 394         /* Shift yh/yl right by the exponent difference.  Any bits that are
 395            shifted out of yl are saved in a9 for rounding the result.  */
 396         ssr     a10
 397         movi    a9, 0
 398         src     a9, yl, a9
 399         src     yl, yh, yl
 400         srl     yh, yh
 401
 402 .Lsub_suby:
 403         /* Do the 64-bit subtraction.  */
 404         sub     xh, xh, yh
 405         bgeu    xl, yl, 1f
 406         addi    xh, xh, -1
 407 1:      sub     xl, xl, yl
 408
 409         /* Subtract the leftover bits in a9 from zero and propagate any
 410            borrow from xh/xl.  */
 411         neg     a9, a9
 412         beqz    a9, 1f
 413         addi    a5, xh, -1
 414         moveqz  xh, a5, xl
 415         addi    xl, xl, -1
 416 1:
 417         /* Check if the subtract underflowed into the exponent.  */
 418         extui   a10, xh, 20, 11
 419         beq     a10, a7, .Lsub_round
 420         j       .Lsub_borrow
 421
 422 .Lsub_compare_low:
 423         /* The high words are equal.  Compare the low words.  */
 424         bltu    xl, yl, .Lsub_xsmaller
 425         bltu    yl, xl, .Lsub_ysmaller
 426         /* The operands are equal.  Return 0.0.  */
 427         movi    xh, 0
 428         movi    xl, 0
 429 1:      leaf_return
 430
 431 .Lsub_yexpzero:
 432         /* y is a subnormal value.  Replace its sign/exponent with zero,
 433            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 434            y's apparent exponent because subnormals behave as if they had
 435            the minimum (nonzero) exponent.  */
 436         slli    yh, yh, 12
 437         srli    yh, yh, 12
 438         bnone   xh, a6, .Lsub_yexpdiff
 439         addi    a8, a8, 1
 440         j       .Lsub_yexpdiff
 441
 442 .Lsub_bigshifty:
 443         /* Exponent difference > 64 -- just return the bigger value.  */
 444         bgeui   a10, 64, 1b
 445
 446         /* Shift yh/yl right by the exponent difference.  Any bits that are
 447            shifted out are saved in a9 for rounding the result.  */
 448         ssr     a10
 449         sll     a11, yl         /* lost bits shifted out of yl */
 450         src     a9, yh, yl
 451         srl     yl, yh
 452         movi    yh, 0
 453         beqz    a11, .Lsub_suby
 454         or      a9, a9, a10     /* any positive, nonzero value will work */
 455         j       .Lsub_suby
 456
 457 .Lsub_xsmaller:
 458         /* Same thing as the "ysmaller" code, but with x and y swapped and
 459            with y negated.  */
 460         bnone   xh, a6, .Lsub_xexpzero
 461
 462         or      xh, xh, a6
 463         slli    xh, xh, 11
 464         srli    xh, xh, 11
 465
 466 .Lsub_xexpdiff:
 467         sub     a10, a8, a7
 468         bgeui   a10, 32, .Lsub_bigshiftx
 469
 470         ssr     a10
 471         movi    a9, 0
 472         src     a9, xl, a9
 473         src     xl, xh, xl
 474         srl     xh, xh
 475
 476         /* Negate y.  */
 477         slli    a11, a6, 11
 478         xor     yh, yh, a11
 479
 480 .Lsub_subx:
 481         sub     xl, yl, xl
 482         sub     xh, yh, xh
 483         bgeu    yl, xl, 1f
 484         addi    xh, xh, -1
 485 1:
 486         /* Subtract the leftover bits in a9 from zero and propagate any
 487            borrow from xh/xl.  */
 488         neg     a9, a9
 489         beqz    a9, 1f
 490         addi    a5, xh, -1
 491         moveqz  xh, a5, xl
 492         addi    xl, xl, -1
 493 1:
 494         /* Check if the subtract underflowed into the exponent.  */
 495         extui   a10, xh, 20, 11
 496         bne     a10, a8, .Lsub_borrow
 497
 498 .Lsub_round:
 499         /* Round up if the leftover fraction is >= 1/2.  */
 500         bgez    a9, 1f
 501         addi    xl, xl, 1
 502         beqz    xl, .Lsub_roundcarry
 503
 504         /* Check if the leftover fraction is exactly 1/2.  */
 505         slli    a9, a9, 1
 506         beqz    a9, .Lsub_exactlyhalf
 507 1:      leaf_return
 508
 509 .Lsub_xexpzero:
 510         /* Same as "yexpzero".  */
 511         slli    xh, xh, 12
 512         srli    xh, xh, 12
 513         bnone   yh, a6, .Lsub_xexpdiff
 514         addi    a7, a7, 1
 515         j       .Lsub_xexpdiff
 516
 517 .Lsub_bigshiftx:
 518         /* Mostly the same thing as "bigshifty", but with the sign bit of the
 519            shifted value set so that the subsequent subtraction flips the
 520            sign of y.  */
 521         bgeui   a10, 64, .Lsub_returny
 522
 523         ssr     a10
 524         sll     a11, xl
 525         src     a9, xh, xl
 526         srl     xl, xh
 527         slli    xh, a6, 11      /* set sign bit of xh */
 528         beqz    a11, .Lsub_subx
 529         or      a9, a9, a10
 530         j       .Lsub_subx
 531
 532 .Lsub_returny:
 533         /* Negate and return y.  */
 534         slli    a7, a6, 11
 535         xor     xh, yh, a7
 536         mov     xl, yl
 537         leaf_return
 538
 539 .Lsub_borrow:
 540         /* The subtraction has underflowed into the exponent field, so the
 541            value needs to be renormalized.  Shift the mantissa left as
 542            needed to remove any leading zeros and adjust the exponent
 543            accordingly.  If the exponent is not large enough to remove
 544            all the leading zeros, the result will be a subnormal value.  */
 545
 546         slli    a8, xh, 12
 547         beqz    a8, .Lsub_xhzero
 548         do_nsau a6, a8, a7, a11
 549         srli    a8, a8, 12
 550         bge     a6, a10, .Lsub_subnormal
 551         addi    a6, a6, 1
 552
 553 .Lsub_shift_lt32:
 554         /* Shift the mantissa (a8/xl/a9) left by a6.  */
 555         ssl     a6
 556         src     a8, a8, xl
 557         src     xl, xl, a9
 558         sll     a9, a9
 559
 560         /* Combine the shifted mantissa with the sign and exponent,
 561            decrementing the exponent by a6.  (The exponent has already
 562            been decremented by one due to the borrow from the subtraction,
 563            but adding the mantissa will increment the exponent by one.)  */
 564         srli    xh, xh, 20
 565         sub     xh, xh, a6
 566         slli    xh, xh, 20
 567         add     xh, xh, a8
 568         j       .Lsub_round
 569
 570 .Lsub_exactlyhalf:
 571         /* Round down to the nearest even value.  */
 572         srli    xl, xl, 1
 573         slli    xl, xl, 1
 574         leaf_return
 575
 576 .Lsub_roundcarry:
 577         /* xl is always zero when the rounding increment overflows, so
 578            there's no need to round it to an even value.  */
 579         addi    xh, xh, 1
 580         /* Overflow to the exponent is OK.  */
 581         leaf_return
 582
 583 .Lsub_xhzero:
 584         /* When normalizing the result, all the mantissa bits in the high
 585            word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
 586         do_nsau a6, xl, a7, a11
 587         addi    a6, a6, 21
 588         blt     a10, a6, .Lsub_subnormal
 589
 590 .Lsub_normalize_shift:
 591         bltui   a6, 32, .Lsub_shift_lt32
 592
 593         ssl     a6
 594         src     a8, xl, a9
 595         sll     xl, a9
 596         movi    a9, 0
 597
 598         srli    xh, xh, 20
 599         sub     xh, xh, a6
 600         slli    xh, xh, 20
 601         add     xh, xh, a8
 602         j       .Lsub_round
 603
 604 .Lsub_subnormal:
 605         /* The exponent is too small to shift away all the leading zeros.
 606            Set a6 to the current exponent (which has already been
 607            decremented by the borrow) so that the exponent of the result
 608            will be zero.  Do not add 1 to a6 in this case, because: (1)
 609            adding the mantissa will not increment the exponent, so there is
 610            no need to subtract anything extra from the exponent to
 611            compensate, and (2) the effective exponent of a subnormal is 1
 612            not 0 so the shift amount must be 1 smaller than normal. */
 613         mov     a6, a10
 614         j       .Lsub_normalize_shift
 615
 616 #endif /* L_addsubdf3 */
 617
 618 #ifdef L_muldf3
 619
 620         /* Multiplication */
 621 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 622 #define XCHAL_NO_MUL 1
 623 #endif
 624
 625         .literal_position
 626 __muldf3_aux:
 627
 628         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 629            (This code is placed before the start of the function just to
 630            keep it in range of the limited branch displacements.)  */
 631
 632 .Lmul_xexpzero:
 633         /* Clear the sign bit of x.  */
 634         slli    xh, xh, 1
 635         srli    xh, xh, 1
 636
 637         /* If x is zero, return zero.  */
 638         or      a10, xh, xl
 639         beqz    a10, .Lmul_return_zero
 640
 641         /* Normalize x.  Adjust the exponent in a8.  */
 642         beqz    xh, .Lmul_xh_zero
 643         do_nsau a10, xh, a11, a12
 644         addi    a10, a10, -11
 645         ssl     a10
 646         src     xh, xh, xl
 647         sll     xl, xl
 648         movi    a8, 1
 649         sub     a8, a8, a10
 650         j       .Lmul_xnormalized
 651 .Lmul_xh_zero:
 652         do_nsau a10, xl, a11, a12
 653         addi    a10, a10, -11
 654         movi    a8, -31
 655         sub     a8, a8, a10
 656         ssl     a10
 657         bltz    a10, .Lmul_xl_srl
 658         sll     xh, xl
 659         movi    xl, 0
 660         j       .Lmul_xnormalized
 661 .Lmul_xl_srl:
 662         srl     xh, xl
 663         sll     xl, xl
 664         j       .Lmul_xnormalized
 665
 666 .Lmul_yexpzero:
 667         /* Clear the sign bit of y.  */
 668         slli    yh, yh, 1
 669         srli    yh, yh, 1
 670
 671         /* If y is zero, return zero.  */
 672         or      a10, yh, yl
 673         beqz    a10, .Lmul_return_zero
 674
 675         /* Normalize y.  Adjust the exponent in a9.  */
 676         beqz    yh, .Lmul_yh_zero
 677         do_nsau a10, yh, a11, a12
 678         addi    a10, a10, -11
 679         ssl     a10
 680         src     yh, yh, yl
 681         sll     yl, yl
 682         movi    a9, 1
 683         sub     a9, a9, a10
 684         j       .Lmul_ynormalized
 685 .Lmul_yh_zero:
 686         do_nsau a10, yl, a11, a12
 687         addi    a10, a10, -11
 688         movi    a9, -31
 689         sub     a9, a9, a10
 690         ssl     a10
 691         bltz    a10, .Lmul_yl_srl
 692         sll     yh, yl
 693         movi    yl, 0
 694         j       .Lmul_ynormalized
 695 .Lmul_yl_srl:
 696         srl     yh, yl
 697         sll     yl, yl
 698         j       .Lmul_ynormalized
 699
 700 .Lmul_return_zero:
 701         /* Return zero with the appropriate sign bit.  */
 702         srli    xh, a7, 31
 703         slli    xh, xh, 31
 704         movi    xl, 0
 705         j       .Lmul_done
 706
 707 .Lmul_xnan_or_inf:
 708         /* If y is zero, return NaN.  */
 709         bnez    yl, 1f
 710         slli    a8, yh, 1
 711         beqz    a8, .Lmul_return_nan
 712 1:
 713         /* If y is NaN, return y.  */
 714         bnall   yh, a6, .Lmul_returnx
 715         slli    a8, yh, 12
 716         or      a8, a8, yl
 717         beqz    a8, .Lmul_returnx
 718
 719 .Lmul_returny:
 720         mov     xh, yh
 721         mov     xl, yl
 722
 723 .Lmul_returnx:
 724         slli    a8, xh, 12
 725         or      a8, a8, xl
 726         bnez    a8, .Lmul_return_nan
 727         /* Set the sign bit and return.  */
 728         extui   a7, a7, 31, 1
 729         slli    xh, xh, 1
 730         ssai    1
 731         src     xh, a7, xh
 732         j       .Lmul_done
 733
 734 .Lmul_ynan_or_inf:
 735         /* If x is zero, return NaN.  */
 736         bnez    xl, .Lmul_returny
 737         slli    a8, xh, 1
 738         bnez    a8, .Lmul_returny
 739         mov     xh, yh
 740
 741 .Lmul_return_nan:
 742         movi    a4, 0x80000     /* make it a quiet NaN */
 743         or      xh, xh, a4
 744         j       .Lmul_done
 745
 746         .align  4
 747         .global __muldf3
 748         .type   __muldf3, @function
 749 __muldf3:
 750 #if __XTENSA_CALL0_ABI__
 751         leaf_entry sp, 32
 752         addi    sp, sp, -32
 753         s32i    a12, sp, 16
 754         s32i    a13, sp, 20
 755         s32i    a14, sp, 24
 756         s32i    a15, sp, 28
 757 #elif XCHAL_NO_MUL
 758         /* This is not really a leaf function; allocate enough stack space
 759            to allow CALL12s to a helper function.  */
 760         leaf_entry sp, 64
 761 #else
 762         leaf_entry sp, 32
 763 #endif
 764         movi    a6, 0x7ff00000
 765
 766         /* Get the sign of the result.  */
 767         xor     a7, xh, yh
 768
 769         /* Check for NaN and infinity.  */
 770         ball    xh, a6, .Lmul_xnan_or_inf
 771         ball    yh, a6, .Lmul_ynan_or_inf
 772
 773         /* Extract the exponents.  */
 774         extui   a8, xh, 20, 11
 775         extui   a9, yh, 20, 11
 776
 777         beqz    a8, .Lmul_xexpzero
 778 .Lmul_xnormalized:
 779         beqz    a9, .Lmul_yexpzero
 780 .Lmul_ynormalized:
 781
 782         /* Add the exponents.  */
 783         add     a8, a8, a9
 784
 785         /* Replace sign/exponent fields with explicit "1.0".  */
 786         movi    a10, 0x1fffff
 787         or      xh, xh, a6
 788         and     xh, xh, a10
 789         or      yh, yh, a6
 790         and     yh, yh, a10
 791
 792         /* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
 793            The least-significant word of the result is thrown away except
 794            that if it is nonzero, the lsb of a6 is set to 1.  */
 795 #if XCHAL_HAVE_MUL32_HIGH
 796
 797         /* Compute a6 with any carry-outs in a10.  */
 798         movi    a10, 0
 799         mull    a6, xl, yh
 800         mull    a11, xh, yl
 801         add     a6, a6, a11
 802         bgeu    a6, a11, 1f
 803         addi    a10, a10, 1
 804 1:
 805         muluh   a11, xl, yl
 806         add     a6, a6, a11
 807         bgeu    a6, a11, 1f
 808         addi    a10, a10, 1
 809 1:
 810         /* If the low word of the result is nonzero, set the lsb of a6.  */
 811         mull    a11, xl, yl
 812         beqz    a11, 1f
 813         movi    a9, 1
 814         or      a6, a6, a9
 815 1:
 816         /* Compute xl with any carry-outs in a9.  */
 817         movi    a9, 0
 818         mull    a11, xh, yh
 819         add     a10, a10, a11
 820         bgeu    a10, a11, 1f
 821         addi    a9, a9, 1
 822 1:
 823         muluh   a11, xh, yl
 824         add     a10, a10, a11
 825         bgeu    a10, a11, 1f
 826         addi    a9, a9, 1
 827 1:
 828         muluh   xl, xl, yh
 829         add     xl, xl, a10
 830         bgeu    xl, a10, 1f
 831         addi    a9, a9, 1
 832 1:
 833         /* Compute xh.  */
 834         muluh   xh, xh, yh
 835         add     xh, xh, a9
 836
 837 #else /* ! XCHAL_HAVE_MUL32_HIGH */
 838
 839         /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
 840            products.  These partial products are:
 841
 842                 0 xll * yll
 843
 844                 1 xll * ylh
 845                 2 xlh * yll
 846
 847                 3 xll * yhl
 848                 4 xlh * ylh
 849                 5 xhl * yll
 850
 851                 6 xll * yhh
 852                 7 xlh * yhl
 853                 8 xhl * ylh
 854                 9 xhh * yll
 855
 856                 10 xlh * yhh
 857                 11 xhl * yhl
 858                 12 xhh * ylh
 859
 860                 13 xhl * yhh
 861                 14 xhh * yhl
 862
 863                 15 xhh * yhh
 864
 865            where the input chunks are (hh, hl, lh, ll).  If using the Mul16
 866            or Mul32 multiplier options, these input chunks must be stored in
 867            separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
 868            that the inputs come from either half of the registers, so there
 869            is no need to shift them out ahead of time.  If there is no
 870            multiply hardware, the 16-bit chunks can be extracted when setting
 871            up the arguments to the separate multiply function.  */
 872
 873         /* Save a7 since it is needed to hold a temporary value.  */
 874         s32i    a7, sp, 4
 875 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 876         /* Calling a separate multiply function will clobber a0 and requires
 877            use of a8 as a temporary, so save those values now.  (The function
 878            uses a custom ABI so nothing else needs to be saved.)  */
 879         s32i    a0, sp, 0
 880         s32i    a8, sp, 8
 881 #endif
 882
 883 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 884
 885 #define xlh a12
 886 #define ylh a13
 887 #define xhh a14
 888 #define yhh a15
 889
 890         /* Get the high halves of the inputs into registers.  */
 891         srli    xlh, xl, 16
 892         srli    ylh, yl, 16
 893         srli    xhh, xh, 16
 894         srli    yhh, yh, 16
 895
 896 #define xll xl
 897 #define yll yl
 898 #define xhl xh
 899 #define yhl yh
 900
 901 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 902         /* Clear the high halves of the inputs.  This does not matter
 903            for MUL16 because the high bits are ignored.  */
 904         extui   xl, xl, 0, 16
 905         extui   xh, xh, 0, 16
 906         extui   yl, yl, 0, 16
 907         extui   yh, yh, 0, 16
 908 #endif
 909 #endif /* MUL16 || MUL32 */
 910
 911
 912 #if XCHAL_HAVE_MUL16
 913
 914 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 915         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 916
 917 #elif XCHAL_HAVE_MUL32
 918
 919 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 920         mull    dst, xreg ## xhalf, yreg ## yhalf
 921
 922 #elif XCHAL_HAVE_MAC16
 923
 924 /* The preprocessor insists on inserting a space when concatenating after
 925    a period in the definition of do_mul below.  These macros are a workaround
 926    using underscores instead of periods when doing the concatenation.  */
 927 #define umul_aa_ll umul.aa.ll
 928 #define umul_aa_lh umul.aa.lh
 929 #define umul_aa_hl umul.aa.hl
 930 #define umul_aa_hh umul.aa.hh
 931
 932 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 933         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 934         rsr     dst, ACCLO
 935
 936 #else /* no multiply hardware */
 937
 938 #define set_arg_l(dst, src) \
 939         extui   dst, src, 0, 16
 940 #define set_arg_h(dst, src) \
 941         srli    dst, src, 16
 942
 943 #if __XTENSA_CALL0_ABI__
 944 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 945         set_arg_ ## xhalf (a13, xreg); \
 946         set_arg_ ## yhalf (a14, yreg); \
 947         call0   .Lmul_mulsi3; \
 948         mov     dst, a12
 949 #else
 950 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 951         set_arg_ ## xhalf (a14, xreg); \
 952         set_arg_ ## yhalf (a15, yreg); \
 953         call12  .Lmul_mulsi3; \
 954         mov     dst, a14
 955 #endif /* __XTENSA_CALL0_ABI__ */
 956
 957 #endif /* no multiply hardware */
 958
 959         /* Add pp1 and pp2 into a10 with carry-out in a9.  */
 960         do_mul(a10, xl, l, yl, h)       /* pp 1 */
 961         do_mul(a11, xl, h, yl, l)       /* pp 2 */
 962         movi    a9, 0
 963         add     a10, a10, a11
 964         bgeu    a10, a11, 1f
 965         addi    a9, a9, 1
 966 1:
 967         /* Initialize a6 with a9/a10 shifted into position.  Note that
 968            this value can be safely incremented without any carry-outs.  */
 969         ssai    16
 970         src     a6, a9, a10
 971
 972         /* Compute the low word into a10.  */
 973         do_mul(a11, xl, l, yl, l)       /* pp 0 */
 974         sll     a10, a10
 975         add     a10, a10, a11
 976         bgeu    a10, a11, 1f
 977         addi    a6, a6, 1
 978 1:
 979         /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
 980            This is good enough to determine the low half of a6, so that any
 981            nonzero bits from the low word of the result can be collapsed
 982            into a6, freeing up a register.  */
 983         movi    a9, 0
 984         do_mul(a11, xl, l, yh, l)       /* pp 3 */
 985         add     a6, a6, a11
 986         bgeu    a6, a11, 1f
 987         addi    a9, a9, 1
 988 1:
 989         do_mul(a11, xl, h, yl, h)       /* pp 4 */
 990         add     a6, a6, a11
 991         bgeu    a6, a11, 1f
 992         addi    a9, a9, 1
 993 1:
 994         do_mul(a11, xh, l, yl, l)       /* pp 5 */
 995         add     a6, a6, a11
 996         bgeu    a6, a11, 1f
 997         addi    a9, a9, 1
 998 1:
 999         /* Collapse any nonzero bits from the low word into a6.  */
1000         beqz    a10, 1f
1001         movi    a11, 1
1002         or      a6, a6, a11
1003 1:
1004         /* Add pp6-9 into a11 with carry-outs in a10.  */
1005         do_mul(a7, xl, l, yh, h)        /* pp 6 */
1006         do_mul(a11, xh, h, yl, l)       /* pp 9 */
1007         movi    a10, 0
1008         add     a11, a11, a7
1009         bgeu    a11, a7, 1f
1010         addi    a10, a10, 1
1011 1:
1012         do_mul(a7, xl, h, yh, l)        /* pp 7 */
1013         add     a11, a11, a7
1014         bgeu    a11, a7, 1f
1015         addi    a10, a10, 1
1016 1:
1017         do_mul(a7, xh, l, yl, h)        /* pp 8 */
1018         add     a11, a11, a7
1019         bgeu    a11, a7, 1f
1020         addi    a10, a10, 1
1021 1:
1022         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1023         src     a10, a10, a11
1024         add     a10, a10, a9
1025         sll     a11, a11
1026         add     a6, a6, a11
1027         bgeu    a6, a11, 1f
1028         addi    a10, a10, 1
1029 1:
1030         /* Add pp10-12 into xl with carry-outs in a9.  */
1031         movi    a9, 0
1032         do_mul(xl, xl, h, yh, h)        /* pp 10 */
1033         add     xl, xl, a10
1034         bgeu    xl, a10, 1f
1035         addi    a9, a9, 1
1036 1:
1037         do_mul(a10, xh, l, yh, l)       /* pp 11 */
1038         add     xl, xl, a10
1039         bgeu    xl, a10, 1f
1040         addi    a9, a9, 1
1041 1:
1042         do_mul(a10, xh, h, yl, h)       /* pp 12 */
1043         add     xl, xl, a10
1044         bgeu    xl, a10, 1f
1045         addi    a9, a9, 1
1046 1:
1047         /* Add pp13-14 into a11 with carry-outs in a10.  */
1048         do_mul(a11, xh, l, yh, h)       /* pp 13 */
1049         do_mul(a7, xh, h, yh, l)        /* pp 14 */
1050         movi    a10, 0
1051         add     a11, a11, a7
1052         bgeu    a11, a7, 1f
1053         addi    a10, a10, 1
1054 1:
1055         /* Shift a10/a11 into position, and add low half of a11 to a6.  */
1056         src     a10, a10, a11
1057         add     a10, a10, a9
1058         sll     a11, a11
1059         add     xl, xl, a11
1060         bgeu    xl, a11, 1f
1061         addi    a10, a10, 1
1062 1:
1063         /* Compute xh.  */
1064         do_mul(xh, xh, h, yh, h)        /* pp 15 */
1065         add     xh, xh, a10
1066
1067         /* Restore values saved on the stack during the multiplication.  */
1068         l32i    a7, sp, 4
1069 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1070         l32i    a0, sp, 0
1071         l32i    a8, sp, 8
1072 #endif
1073 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1074
1075         /* Shift left by 12 bits, unless there was a carry-out from the
1076            multiply, in which case, shift by 11 bits and increment the
1077            exponent.  Note: It is convenient to use the constant 0x3ff
1078            instead of 0x400 when removing the extra exponent bias (so that
1079            it is easy to construct 0x7fe for the overflow check).  Reverse
1080            the logic here to decrement the exponent sum by one unless there
1081            was a carry-out.  */
1082         movi    a4, 11
1083         srli    a5, xh, 21 - 12
1084         bnez    a5, 1f
1085         addi    a4, a4, 1
1086         addi    a8, a8, -1
1087 1:      ssl     a4
1088         src     xh, xh, xl
1089         src     xl, xl, a6
1090         sll     a6, a6
1091
1092         /* Subtract the extra bias from the exponent sum (plus one to account
1093            for the explicit "1.0" of the mantissa that will be added to the
1094            exponent in the final result).  */
1095         movi    a4, 0x3ff
1096         sub     a8, a8, a4
1097
1098         /* Check for over/underflow.  The value in a8 is one less than the
1099            final exponent, so values in the range 0..7fd are OK here.  */
1100         slli    a4, a4, 1       /* 0x7fe */
1101         bgeu    a8, a4, .Lmul_overflow
1102
1103 .Lmul_round:
1104         /* Round.  */
1105         bgez    a6, .Lmul_rounded
1106         addi    xl, xl, 1
1107         beqz    xl, .Lmul_roundcarry
1108         slli    a6, a6, 1
1109         beqz    a6, .Lmul_exactlyhalf
1110
1111 .Lmul_rounded:
1112         /* Add the exponent to the mantissa.  */
1113         slli    a8, a8, 20
1114         add     xh, xh, a8
1115
1116 .Lmul_addsign:
1117         /* Add the sign bit.  */
1118         srli    a7, a7, 31
1119         slli    a7, a7, 31
1120         or      xh, xh, a7
1121
1122 .Lmul_done:
1123 #if __XTENSA_CALL0_ABI__
1124         l32i    a12, sp, 16
1125         l32i    a13, sp, 20
1126         l32i    a14, sp, 24
1127         l32i    a15, sp, 28
1128         addi    sp, sp, 32
1129 #endif
1130         leaf_return
1131
1132 .Lmul_exactlyhalf:
1133         /* Round down to the nearest even value.  */
1134         srli    xl, xl, 1
1135         slli    xl, xl, 1
1136         j       .Lmul_rounded
1137
1138 .Lmul_roundcarry:
1139         /* xl is always zero when the rounding increment overflows, so
1140            there's no need to round it to an even value.  */
1141         addi    xh, xh, 1
1142         /* Overflow is OK -- it will be added to the exponent.  */
1143         j       .Lmul_rounded
1144
1145 .Lmul_overflow:
1146         bltz    a8, .Lmul_underflow
1147         /* Return +/- Infinity.  */
1148         addi    a8, a4, 1       /* 0x7ff */
1149         slli    xh, a8, 20
1150         movi    xl, 0
1151         j       .Lmul_addsign
1152
1153 .Lmul_underflow:
1154         /* Create a subnormal value, where the exponent field contains zero,
1155            but the effective exponent is 1.  The value of a8 is one less than
1156            the actual exponent, so just negate it to get the shift amount.  */
1157         neg     a8, a8
1158         mov     a9, a6
1159         ssr     a8
1160         bgeui   a8, 32, .Lmul_bigshift
1161
1162         /* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1163            in a6 (combined with the shifted-out bits currently in a6) for
1164            rounding the result.  */
1165         sll     a6, xl
1166         src     xl, xh, xl
1167         srl     xh, xh
1168         j       1f
1169
1170 .Lmul_bigshift:
1171         bgeui   a8, 64, .Lmul_flush_to_zero
1172         sll     a10, xl         /* lost bits shifted out of xl */
1173         src     a6, xh, xl
1174         srl     xl, xh
1175         movi    xh, 0
1176         or      a9, a9, a10
1177
1178         /* Set the exponent to zero.  */
1179 1:      movi    a8, 0
1180
1181         /* Pack any nonzero bits shifted out into a6.  */
1182         beqz    a9, .Lmul_round
1183         movi    a9, 1
1184         or      a6, a6, a9
1185         j       .Lmul_round
1186
1187 .Lmul_flush_to_zero:
1188         /* Return zero with the appropriate sign bit.  */
1189         srli    xh, a7, 31
1190         slli    xh, xh, 31
1191         movi    xl, 0
1192         j       .Lmul_done
1193
1194 #if XCHAL_NO_MUL
1195
1196         /* For Xtensa processors with no multiply hardware, this simplified
1197            version of _mulsi3 is used for multiplying 16-bit chunks of
1198            the floating-point mantissas.  When using CALL0, this function
1199            uses a custom ABI: the inputs are passed in a13 and a14, the
1200            result is returned in a12, and a8 and a15 are clobbered.  */
1201         .align  4
1202 .Lmul_mulsi3:
1203         leaf_entry sp, 16
1204         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1205         movi    \dst, 0
1206 1:      add     \tmp1, \src2, \dst
1207         extui   \tmp2, \src1, 0, 1
1208         movnez  \dst, \tmp1, \tmp2
1209
1210         do_addx2 \tmp1, \src2, \dst, \tmp1
1211         extui   \tmp2, \src1, 1, 1
1212         movnez  \dst, \tmp1, \tmp2
1213
1214         do_addx4 \tmp1, \src2, \dst, \tmp1
1215         extui   \tmp2, \src1, 2, 1
1216         movnez  \dst, \tmp1, \tmp2
1217
1218         do_addx8 \tmp1, \src2, \dst, \tmp1
1219         extui   \tmp2, \src1, 3, 1
1220         movnez  \dst, \tmp1, \tmp2
1221
1222         srli    \src1, \src1, 4
1223         slli    \src2, \src2, 4
1224         bnez    \src1, 1b
1225         .endm
1226 #if __XTENSA_CALL0_ABI__
1227         mul_mulsi3_body a12, a13, a14, a15, a8
1228 #else
1229         /* The result will be written into a2, so save that argument in a4.  */
1230         mov     a4, a2
1231         mul_mulsi3_body a2, a4, a3, a5, a6
1232 #endif
1233         leaf_return
1234 #endif /* XCHAL_NO_MUL */
1235 #endif /* L_muldf3 */
1236
1237 #ifdef L_divdf3
1238
1239         /* Division */
1240
1241 #if XCHAL_HAVE_DFP_DIV
1242
1243         .text
1244         .align 4
1245         .global __divdf3
1246         .type   __divdf3, @function
1247 __divdf3:
1248         leaf_entry      sp, 16
1249
1250         wfrd            f1, xh, xl
1251         wfrd            f2, yh, yl
1252
1253         div0.d          f3, f2
1254         nexp01.d        f4, f2
1255         const.d         f0, 1
1256         maddn.d         f0, f4, f3
1257         const.d         f5, 0
1258         mov.d           f7, f2
1259         mkdadj.d        f7, f1
1260         maddn.d         f3, f0, f3
1261         maddn.d         f5, f0, f0
1262         nexp01.d        f1, f1
1263         div0.d          f2, f2
1264         maddn.d         f3, f5, f3
1265         const.d         f5, 1
1266         const.d         f0, 0
1267         neg.d           f6, f1
1268         maddn.d         f5, f4, f3
1269         maddn.d         f0, f6, f2
1270         maddn.d         f3, f5, f3
1271         maddn.d         f6, f4, f0
1272         const.d         f2, 1
1273         maddn.d         f2, f4, f3
1274         maddn.d         f0, f6, f3
1275         neg.d           f1, f1
1276         maddn.d         f3, f2, f3
1277         maddn.d         f1, f4, f0
1278         addexpm.d       f0, f7
1279         addexp.d        f3, f7
1280         divn.d          f0, f1, f3
1281
1282         rfr             xl, f0
1283         rfrd            xh, f0
1284
1285         leaf_return
1286
1287 #else
1288
1289         .literal_position
1290
1291 __divdf3_aux:
1292
1293         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1294            (This code is placed before the start of the function just to
1295            keep it in range of the limited branch displacements.)  */
1296
1297 .Ldiv_yexpzero:
1298         /* Clear the sign bit of y.  */
1299         slli    yh, yh, 1
1300         srli    yh, yh, 1
1301
1302         /* Check for division by zero.  */
1303         or      a10, yh, yl
1304         beqz    a10, .Ldiv_yzero
1305
1306         /* Normalize y.  Adjust the exponent in a9.  */
1307         beqz    yh, .Ldiv_yh_zero
1308         do_nsau a10, yh, a11, a9
1309         addi    a10, a10, -11
1310         ssl     a10
1311         src     yh, yh, yl
1312         sll     yl, yl
1313         movi    a9, 1
1314         sub     a9, a9, a10
1315         j       .Ldiv_ynormalized
1316 .Ldiv_yh_zero:
1317         do_nsau a10, yl, a11, a9
1318         addi    a10, a10, -11
1319         movi    a9, -31
1320         sub     a9, a9, a10
1321         ssl     a10
1322         bltz    a10, .Ldiv_yl_srl
1323         sll     yh, yl
1324         movi    yl, 0
1325         j       .Ldiv_ynormalized
1326 .Ldiv_yl_srl:
1327         srl     yh, yl
1328         sll     yl, yl
1329         j       .Ldiv_ynormalized
1330
1331 .Ldiv_yzero:
1332         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1333         slli    xh, xh, 1
1334         srli    xh, xh, 1
1335         or      xl, xl, xh
1336         srli    xh, a7, 31
1337         slli    xh, xh, 31
1338         or      xh, xh, a6
1339         bnez    xl, 1f
1340         movi    a4, 0x80000     /* make it a quiet NaN */
1341         or      xh, xh, a4
1342 1:      movi    xl, 0
1343         leaf_return
1344
1345 .Ldiv_xexpzero:
1346         /* Clear the sign bit of x.  */
1347         slli    xh, xh, 1
1348         srli    xh, xh, 1
1349
1350         /* If x is zero, return zero.  */
1351         or      a10, xh, xl
1352         beqz    a10, .Ldiv_return_zero
1353
1354         /* Normalize x.  Adjust the exponent in a8.  */
1355         beqz    xh, .Ldiv_xh_zero
1356         do_nsau a10, xh, a11, a8
1357         addi    a10, a10, -11
1358         ssl     a10
1359         src     xh, xh, xl
1360         sll     xl, xl
1361         movi    a8, 1
1362         sub     a8, a8, a10
1363         j       .Ldiv_xnormalized
1364 .Ldiv_xh_zero:
1365         do_nsau a10, xl, a11, a8
1366         addi    a10, a10, -11
1367         movi    a8, -31
1368         sub     a8, a8, a10
1369         ssl     a10
1370         bltz    a10, .Ldiv_xl_srl
1371         sll     xh, xl
1372         movi    xl, 0
1373         j       .Ldiv_xnormalized
1374 .Ldiv_xl_srl:
1375         srl     xh, xl
1376         sll     xl, xl
1377         j       .Ldiv_xnormalized
1378
1379 .Ldiv_return_zero:
1380         /* Return zero with the appropriate sign bit.  */
1381         srli    xh, a7, 31
1382         slli    xh, xh, 31
1383         movi    xl, 0
1384         leaf_return
1385
1386 .Ldiv_xnan_or_inf:
1387         /* Set the sign bit of the result.  */
1388         srli    a7, yh, 31
1389         slli    a7, a7, 31
1390         xor     xh, xh, a7
1391         /* If y is NaN or Inf, return NaN.  */
1392         ball    yh, a6, .Ldiv_return_nan
1393         slli    a8, xh, 12
1394         or      a8, a8, xl
1395         bnez    a8, .Ldiv_return_nan
1396         leaf_return
1397
1398 .Ldiv_ynan_or_inf:
1399         /* If y is Infinity, return zero.  */
1400         slli    a8, yh, 12
1401         or      a8, a8, yl
1402         beqz    a8, .Ldiv_return_zero
1403         /* y is NaN; return it.  */
1404         mov     xh, yh
1405         mov     xl, yl
1406
1407 .Ldiv_return_nan:
1408         movi    a4, 0x80000     /* make it a quiet NaN */
1409         or      xh, xh, a4
1410         leaf_return
1411
1412 .Ldiv_highequal1:
1413         bltu    xl, yl, 2f
1414         j       3f
1415
1416         .align  4
1417         .global __divdf3
1418         .type   __divdf3, @function
1419 __divdf3:
1420         leaf_entry sp, 16
1421         movi    a6, 0x7ff00000
1422
1423         /* Get the sign of the result.  */
1424         xor     a7, xh, yh
1425
1426         /* Check for NaN and infinity.  */
1427         ball    xh, a6, .Ldiv_xnan_or_inf
1428         ball    yh, a6, .Ldiv_ynan_or_inf
1429
1430         /* Extract the exponents.  */
1431         extui   a8, xh, 20, 11
1432         extui   a9, yh, 20, 11
1433
1434         beqz    a9, .Ldiv_yexpzero
1435 .Ldiv_ynormalized:
1436         beqz    a8, .Ldiv_xexpzero
1437 .Ldiv_xnormalized:
1438
1439         /* Subtract the exponents.  */
1440         sub     a8, a8, a9
1441
1442         /* Replace sign/exponent fields with explicit "1.0".  */
1443         movi    a10, 0x1fffff
1444         or      xh, xh, a6
1445         and     xh, xh, a10
1446         or      yh, yh, a6
1447         and     yh, yh, a10
1448
1449         /* Set SAR for left shift by one.  */
1450         ssai    (32 - 1)
1451
1452         /* The first digit of the mantissa division must be a one.
1453            Shift x (and adjust the exponent) as needed to make this true.  */
1454         bltu    yh, xh, 3f
1455         beq     yh, xh, .Ldiv_highequal1
1456 2:      src     xh, xh, xl
1457         sll     xl, xl
1458         addi    a8, a8, -1
1459 3:
1460         /* Do the first subtraction and shift.  */
1461         sub     xh, xh, yh
1462         bgeu    xl, yl, 1f
1463         addi    xh, xh, -1
1464 1:      sub     xl, xl, yl
1465         src     xh, xh, xl
1466         sll     xl, xl
1467
1468         /* Put the quotient into a10/a11.  */
1469         movi    a10, 0
1470         movi    a11, 1
1471
1472         /* Divide one bit at a time for 52 bits.  */
1473         movi    a9, 52
1474 #if XCHAL_HAVE_LOOPS
1475         loop    a9, .Ldiv_loopend
1476 #endif
1477 .Ldiv_loop:
1478         /* Shift the quotient << 1.  */
1479         src     a10, a10, a11
1480         sll     a11, a11
1481
1482         /* Is this digit a 0 or 1?  */
1483         bltu    xh, yh, 3f
1484         beq     xh, yh, .Ldiv_highequal2
1485
1486         /* Output a 1 and subtract.  */
1487 2:      addi    a11, a11, 1
1488         sub     xh, xh, yh
1489         bgeu    xl, yl, 1f
1490         addi    xh, xh, -1
1491 1:      sub     xl, xl, yl
1492
1493         /* Shift the dividend << 1.  */
1494 3:      src     xh, xh, xl
1495         sll     xl, xl
1496
1497 #if !XCHAL_HAVE_LOOPS
1498         addi    a9, a9, -1
1499         bnez    a9, .Ldiv_loop
1500 #endif
1501 .Ldiv_loopend:
1502
1503         /* Add the exponent bias (less one to account for the explicit "1.0"
1504            of the mantissa that will be added to the exponent in the final
1505            result).  */
1506         movi    a9, 0x3fe
1507         add     a8, a8, a9
1508
1509         /* Check for over/underflow.  The value in a8 is one less than the
1510            final exponent, so values in the range 0..7fd are OK here.  */
1511         addmi   a9, a9, 0x400   /* 0x7fe */
1512         bgeu    a8, a9, .Ldiv_overflow
1513
1514 .Ldiv_round:
1515         /* Round.  The remainder (<< 1) is in xh/xl.  */
1516         bltu    xh, yh, .Ldiv_rounded
1517         beq     xh, yh, .Ldiv_highequal3
1518 .Ldiv_roundup:
1519         addi    a11, a11, 1
1520         beqz    a11, .Ldiv_roundcarry
1521
1522 .Ldiv_rounded:
1523         mov     xl, a11
1524         /* Add the exponent to the mantissa.  */
1525         slli    a8, a8, 20
1526         add     xh, a10, a8
1527
1528 .Ldiv_addsign:
1529         /* Add the sign bit.  */
1530         srli    a7, a7, 31
1531         slli    a7, a7, 31
1532         or      xh, xh, a7
1533         leaf_return
1534
1535 .Ldiv_highequal2:
1536         bgeu    xl, yl, 2b
1537         j       3b
1538
1539 .Ldiv_highequal3:
1540         bltu    xl, yl, .Ldiv_rounded
1541         bne     xl, yl, .Ldiv_roundup
1542
1543         /* Remainder is exactly half the divisor.  Round even.  */
1544         addi    a11, a11, 1
1545         beqz    a11, .Ldiv_roundcarry
1546         srli    a11, a11, 1
1547         slli    a11, a11, 1
1548         j       .Ldiv_rounded
1549
1550 .Ldiv_overflow:
1551         bltz    a8, .Ldiv_underflow
1552         /* Return +/- Infinity.  */
1553         addi    a8, a9, 1       /* 0x7ff */
1554         slli    xh, a8, 20
1555         movi    xl, 0
1556         j       .Ldiv_addsign
1557
1558 .Ldiv_underflow:
1559         /* Create a subnormal value, where the exponent field contains zero,
1560            but the effective exponent is 1.  The value of a8 is one less than
1561            the actual exponent, so just negate it to get the shift amount.  */
1562         neg     a8, a8
1563         ssr     a8
1564         bgeui   a8, 32, .Ldiv_bigshift
1565
1566         /* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1567            saved in a6 for rounding the result.  */
1568         sll     a6, a11
1569         src     a11, a10, a11
1570         srl     a10, a10
1571         j       1f
1572
1573 .Ldiv_bigshift:
1574         bgeui   a8, 64, .Ldiv_flush_to_zero
1575         sll     a9, a11         /* lost bits shifted out of a11 */
1576         src     a6, a10, a11
1577         srl     a11, a10
1578         movi    a10, 0
1579         or      xl, xl, a9
1580
1581         /* Set the exponent to zero.  */
1582 1:      movi    a8, 0
1583
1584         /* Pack any nonzero remainder (in xh/xl) into a6.  */
1585         or      xh, xh, xl
1586         beqz    xh, 1f
1587         movi    a9, 1
1588         or      a6, a6, a9
1589
1590         /* Round a10/a11 based on the bits shifted out into a6.  */
1591 1:      bgez    a6, .Ldiv_rounded
1592         addi    a11, a11, 1
1593         beqz    a11, .Ldiv_roundcarry
1594         slli    a6, a6, 1
1595         bnez    a6, .Ldiv_rounded
1596         srli    a11, a11, 1
1597         slli    a11, a11, 1
1598         j       .Ldiv_rounded
1599
1600 .Ldiv_roundcarry:
1601         /* a11 is always zero when the rounding increment overflows, so
1602            there's no need to round it to an even value.  */
1603         addi    a10, a10, 1
1604         /* Overflow to the exponent field is OK.  */
1605         j       .Ldiv_rounded
1606
1607 .Ldiv_flush_to_zero:
1608         /* Return zero with the appropriate sign bit.  */
1609         srli    xh, a7, 31
1610         slli    xh, xh, 31
1611         movi    xl, 0
1612         leaf_return
1613
1614 #endif /* XCHAL_HAVE_DFP_DIV */
1615
1616 #endif /* L_divdf3 */
1617
1618 #ifdef L_cmpdf2
1619
1620         /* Equal and Not Equal */
1621
1622         .align  4
1623         .global __eqdf2
1624         .global __nedf2
1625         .set    __nedf2, __eqdf2
1626         .type   __eqdf2, @function
1627 __eqdf2:
1628         leaf_entry sp, 16
1629         bne     xl, yl, 2f
1630         bne     xh, yh, 4f
1631
1632         /* The values are equal but NaN != NaN.  Check the exponent.  */
1633         movi    a6, 0x7ff00000
1634         ball    xh, a6, 3f
1635
1636         /* Equal.  */
1637         movi    a2, 0
1638         leaf_return
1639
1640         /* Not equal.  */
1641 2:      movi    a2, 1
1642         leaf_return
1643
1644         /* Check if the mantissas are nonzero.  */
1645 3:      slli    a7, xh, 12
1646         or      a7, a7, xl
1647         j       5f
1648
1649         /* Check if x and y are zero with different signs.  */
1650 4:      or      a7, xh, yh
1651         slli    a7, a7, 1
1652         or      a7, a7, xl      /* xl == yl here */
1653
1654         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1655            or x when exponent(x) = 0x7ff and x == y.  */
1656 5:      movi    a2, 0
1657         movi    a3, 1
1658         movnez  a2, a3, a7
1659         leaf_return
1660
1661
1662         /* Greater Than */
1663
1664         .align  4
1665         .global __gtdf2
1666         .type   __gtdf2, @function
1667 __gtdf2:
1668         leaf_entry sp, 16
1669         movi    a6, 0x7ff00000
1670         ball    xh, a6, 2f
1671 1:      bnall   yh, a6, .Lle_cmp
1672
1673         /* Check if y is a NaN.  */
1674         slli    a7, yh, 12
1675         or      a7, a7, yl
1676         beqz    a7, .Lle_cmp
1677         movi    a2, 0
1678         leaf_return
1679
1680         /* Check if x is a NaN.  */
1681 2:      slli    a7, xh, 12
1682         or      a7, a7, xl
1683         beqz    a7, 1b
1684         movi    a2, 0
1685         leaf_return
1686
1687
1688         /* Less Than or Equal */
1689
1690         .align  4
1691         .global __ledf2
1692         .type   __ledf2, @function
1693 __ledf2:
1694         leaf_entry sp, 16
1695         movi    a6, 0x7ff00000
1696         ball    xh, a6, 2f
1697 1:      bnall   yh, a6, .Lle_cmp
1698
1699         /* Check if y is a NaN.  */
1700         slli    a7, yh, 12
1701         or      a7, a7, yl
1702         beqz    a7, .Lle_cmp
1703         movi    a2, 1
1704         leaf_return
1705
1706         /* Check if x is a NaN.  */
1707 2:      slli    a7, xh, 12
1708         or      a7, a7, xl
1709         beqz    a7, 1b
1710         movi    a2, 1
1711         leaf_return
1712
1713 .Lle_cmp:
1714         /* Check if x and y have different signs.  */
1715         xor     a7, xh, yh
1716         bltz    a7, .Lle_diff_signs
1717
1718         /* Check if x is negative.  */
1719         bltz    xh, .Lle_xneg
1720
1721         /* Check if x <= y.  */
1722         bltu    xh, yh, 4f
1723         bne     xh, yh, 5f
1724         bltu    yl, xl, 5f
1725 4:      movi    a2, 0
1726         leaf_return
1727
1728 .Lle_xneg:
1729         /* Check if y <= x.  */
1730         bltu    yh, xh, 4b
1731         bne     yh, xh, 5f
1732         bgeu    xl, yl, 4b
1733 5:      movi    a2, 1
1734         leaf_return
1735
1736 .Lle_diff_signs:
1737         bltz    xh, 4b
1738
1739         /* Check if both x and y are zero.  */
1740         or      a7, xh, yh
1741         slli    a7, a7, 1
1742         or      a7, a7, xl
1743         or      a7, a7, yl
1744         movi    a2, 1
1745         movi    a3, 0
1746         moveqz  a2, a3, a7
1747         leaf_return
1748
1749
1750         /* Greater Than or Equal */
1751
1752         .align  4
1753         .global __gedf2
1754         .type   __gedf2, @function
1755 __gedf2:
1756         leaf_entry sp, 16
1757         movi    a6, 0x7ff00000
1758         ball    xh, a6, 2f
1759 1:      bnall   yh, a6, .Llt_cmp
1760
1761         /* Check if y is a NaN.  */
1762         slli    a7, yh, 12
1763         or      a7, a7, yl
1764         beqz    a7, .Llt_cmp
1765         movi    a2, -1
1766         leaf_return
1767
1768         /* Check if x is a NaN.  */
1769 2:      slli    a7, xh, 12
1770         or      a7, a7, xl
1771         beqz    a7, 1b
1772         movi    a2, -1
1773         leaf_return
1774
1775
1776         /* Less Than */
1777
1778         .align  4
1779         .global __ltdf2
1780         .type   __ltdf2, @function
1781 __ltdf2:
1782         leaf_entry sp, 16
1783         movi    a6, 0x7ff00000
1784         ball    xh, a6, 2f
1785 1:      bnall   yh, a6, .Llt_cmp
1786
1787         /* Check if y is a NaN.  */
1788         slli    a7, yh, 12
1789         or      a7, a7, yl
1790         beqz    a7, .Llt_cmp
1791         movi    a2, 0
1792         leaf_return
1793
1794         /* Check if x is a NaN.  */
1795 2:      slli    a7, xh, 12
1796         or      a7, a7, xl
1797         beqz    a7, 1b
1798         movi    a2, 0
1799         leaf_return
1800
1801 .Llt_cmp:
1802         /* Check if x and y have different signs.  */
1803         xor     a7, xh, yh
1804         bltz    a7, .Llt_diff_signs
1805
1806         /* Check if x is negative.  */
1807         bltz    xh, .Llt_xneg
1808
1809         /* Check if x < y.  */
1810         bltu    xh, yh, 4f
1811         bne     xh, yh, 5f
1812         bgeu    xl, yl, 5f
1813 4:      movi    a2, -1
1814         leaf_return
1815
1816 .Llt_xneg:
1817         /* Check if y < x.  */
1818         bltu    yh, xh, 4b
1819         bne     yh, xh, 5f
1820         bltu    yl, xl, 4b
1821 5:      movi    a2, 0
1822         leaf_return
1823
1824 .Llt_diff_signs:
1825         bgez    xh, 5b
1826
1827         /* Check if both x and y are nonzero.  */
1828         or      a7, xh, yh
1829         slli    a7, a7, 1
1830         or      a7, a7, xl
1831         or      a7, a7, yl
1832         movi    a2, 0
1833         movi    a3, -1
1834         movnez  a2, a3, a7
1835         leaf_return
1836
1837
1838         /* Unordered */
1839
1840         .align  4
1841         .global __unorddf2
1842         .type   __unorddf2, @function
1843 __unorddf2:
1844         leaf_entry sp, 16
1845         movi    a6, 0x7ff00000
1846         ball    xh, a6, 3f
1847 1:      ball    yh, a6, 4f
1848 2:      movi    a2, 0
1849         leaf_return
1850
1851 3:      slli    a7, xh, 12
1852         or      a7, a7, xl
1853         beqz    a7, 1b
1854         movi    a2, 1
1855         leaf_return
1856
1857 4:      slli    a7, yh, 12
1858         or      a7, a7, yl
1859         beqz    a7, 2b
1860         movi    a2, 1
1861         leaf_return
1862
1863 #endif /* L_cmpdf2 */
1864
1865 #ifdef L_fixdfsi
1866
1867         .align  4
1868         .global __fixdfsi
1869         .type   __fixdfsi, @function
1870 __fixdfsi:
1871         leaf_entry sp, 16
1872
1873         /* Check for NaN and Infinity.  */
1874         movi    a6, 0x7ff00000
1875         ball    xh, a6, .Lfixdfsi_nan_or_inf
1876
1877         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1878         extui   a4, xh, 20, 11
1879         extui   a5, a6, 19, 10  /* 0x3fe */
1880         sub     a4, a4, a5
1881         bgei    a4, 32, .Lfixdfsi_maxint
1882         blti    a4, 1, .Lfixdfsi_zero
1883
1884         /* Add explicit "1.0" and shift << 11.  */
1885         or      a7, xh, a6
1886         ssai    (32 - 11)
1887         src     a5, a7, xl
1888
1889         /* Shift back to the right, based on the exponent.  */
1890         ssl     a4              /* shift by 32 - a4 */
1891         srl     a5, a5
1892
1893         /* Negate the result if sign != 0.  */
1894         neg     a2, a5
1895         movgez  a2, a5, a7
1896         leaf_return
1897
1898 .Lfixdfsi_nan_or_inf:
1899         /* Handle Infinity and NaN.  */
1900         slli    a4, xh, 12
1901         or      a4, a4, xl
1902         beqz    a4, .Lfixdfsi_maxint
1903
1904         /* Translate NaN to +maxint.  */
1905         movi    xh, 0
1906
1907 .Lfixdfsi_maxint:
1908         slli    a4, a6, 11      /* 0x80000000 */
1909         addi    a5, a4, -1      /* 0x7fffffff */
1910         movgez  a4, a5, xh
1911         mov     a2, a4
1912         leaf_return
1913
1914 .Lfixdfsi_zero:
1915         movi    a2, 0
1916         leaf_return
1917
1918 #endif /* L_fixdfsi */
1919
1920 #ifdef L_fixdfdi
1921
1922         .align  4
1923         .global __fixdfdi
1924         .type   __fixdfdi, @function
1925 __fixdfdi:
1926         leaf_entry sp, 16
1927
1928         /* Check for NaN and Infinity.  */
1929         movi    a6, 0x7ff00000
1930         ball    xh, a6, .Lfixdfdi_nan_or_inf
1931
1932         /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1933         extui   a4, xh, 20, 11
1934         extui   a5, a6, 19, 10  /* 0x3fe */
1935         sub     a4, a4, a5
1936         bgei    a4, 64, .Lfixdfdi_maxint
1937         blti    a4, 1, .Lfixdfdi_zero
1938
1939         /* Add explicit "1.0" and shift << 11.  */
1940         or      a7, xh, a6
1941         ssai    (32 - 11)
1942         src     xh, a7, xl
1943         sll     xl, xl
1944
1945         /* Shift back to the right, based on the exponent.  */
1946         ssl     a4              /* shift by 64 - a4 */
1947         bgei    a4, 32, .Lfixdfdi_smallshift
1948         srl     xl, xh
1949         movi    xh, 0
1950
1951 .Lfixdfdi_shifted:
1952         /* Negate the result if sign != 0.  */
1953         bgez    a7, 1f
1954         neg     xl, xl
1955         neg     xh, xh
1956         beqz    xl, 1f
1957         addi    xh, xh, -1
1958 1:      leaf_return
1959
1960 .Lfixdfdi_smallshift:
1961         src     xl, xh, xl
1962         srl     xh, xh
1963         j       .Lfixdfdi_shifted
1964
1965 .Lfixdfdi_nan_or_inf:
1966         /* Handle Infinity and NaN.  */
1967         slli    a4, xh, 12
1968         or      a4, a4, xl
1969         beqz    a4, .Lfixdfdi_maxint
1970
1971         /* Translate NaN to +maxint.  */
1972         movi    xh, 0
1973
1974 .Lfixdfdi_maxint:
1975         slli    a7, a6, 11      /* 0x80000000 */
1976         bgez    xh, 1f
1977         mov     xh, a7
1978         movi    xl, 0
1979         leaf_return
1980
1981 1:      addi    xh, a7, -1      /* 0x7fffffff */
1982         movi    xl, -1
1983         leaf_return
1984
1985 .Lfixdfdi_zero:
1986         movi    xh, 0
1987         movi    xl, 0
1988         leaf_return
1989
1990 #endif /* L_fixdfdi */
1991
1992 #ifdef L_fixunsdfsi
1993
1994         .align  4
1995         .global __fixunsdfsi
1996         .type   __fixunsdfsi, @function
1997 __fixunsdfsi:
1998         leaf_entry sp, 16
1999
2000         /* Check for NaN and Infinity.  */
2001         movi    a6, 0x7ff00000
2002         ball    xh, a6, .Lfixunsdfsi_nan_or_inf
2003
2004         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
2005         extui   a4, xh, 20, 11
2006         extui   a5, a6, 20, 10  /* 0x3ff */
2007         sub     a4, a4, a5
2008         bgei    a4, 32, .Lfixunsdfsi_maxint
2009         bltz    a4, .Lfixunsdfsi_zero
2010
2011         /* Add explicit "1.0" and shift << 11.  */
2012         or      a7, xh, a6
2013         ssai    (32 - 11)
2014         src     a5, a7, xl
2015
2016         /* Shift back to the right, based on the exponent.  */
2017         addi    a4, a4, 1
2018         beqi    a4, 32, .Lfixunsdfsi_bigexp
2019         ssl     a4              /* shift by 32 - a4 */
2020         srl     a5, a5
2021
2022         /* Negate the result if sign != 0.  */
2023         neg     a2, a5
2024         movgez  a2, a5, a7
2025         leaf_return
2026
2027 .Lfixunsdfsi_nan_or_inf:
2028         /* Handle Infinity and NaN.  */
2029         slli    a4, xh, 12
2030         or      a4, a4, xl
2031         beqz    a4, .Lfixunsdfsi_maxint
2032
2033         /* Translate NaN to 0xffffffff.  */
2034         movi    a2, -1
2035         leaf_return
2036
2037 .Lfixunsdfsi_maxint:
2038         slli    a4, a6, 11      /* 0x80000000 */
2039         movi    a5, -1          /* 0xffffffff */
2040         movgez  a4, a5, xh
2041         mov     a2, a4
2042         leaf_return
2043
2044 .Lfixunsdfsi_zero:
2045         movi    a2, 0
2046         leaf_return
2047
2048 .Lfixunsdfsi_bigexp:
2049         /* Handle unsigned maximum exponent case.  */
2050         bltz    xh, 1f
2051         mov     a2, a5          /* no shift needed */
2052         leaf_return
2053
2054         /* Return 0x80000000 if negative.  */
2055 1:      slli    a2, a6, 11
2056         leaf_return
2057
2058 #endif /* L_fixunsdfsi */
2059
2060 #ifdef L_fixunsdfdi
2061
2062         .align  4
2063         .global __fixunsdfdi
2064         .type   __fixunsdfdi, @function
2065 __fixunsdfdi:
2066         leaf_entry sp, 16
2067
2068         /* Check for NaN and Infinity.  */
2069         movi    a6, 0x7ff00000
2070         ball    xh, a6, .Lfixunsdfdi_nan_or_inf
2071
2072         /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
2073         extui   a4, xh, 20, 11
2074         extui   a5, a6, 20, 10  /* 0x3ff */
2075         sub     a4, a4, a5
2076         bgei    a4, 64, .Lfixunsdfdi_maxint
2077         bltz    a4, .Lfixunsdfdi_zero
2078
2079         /* Add explicit "1.0" and shift << 11.  */
2080         or      a7, xh, a6
2081         ssai    (32 - 11)
2082         src     xh, a7, xl
2083         sll     xl, xl
2084
2085         /* Shift back to the right, based on the exponent.  */
2086         addi    a4, a4, 1
2087         beqi    a4, 64, .Lfixunsdfdi_bigexp
2088         ssl     a4              /* shift by 64 - a4 */
2089         bgei    a4, 32, .Lfixunsdfdi_smallshift
2090         srl     xl, xh
2091         movi    xh, 0
2092
2093 .Lfixunsdfdi_shifted:
2094         /* Negate the result if sign != 0.  */
2095         bgez    a7, 1f
2096         neg     xl, xl
2097         neg     xh, xh
2098         beqz    xl, 1f
2099         addi    xh, xh, -1
2100 1:      leaf_return
2101
2102 .Lfixunsdfdi_smallshift:
2103         src     xl, xh, xl
2104         srl     xh, xh
2105         j       .Lfixunsdfdi_shifted
2106
2107 .Lfixunsdfdi_nan_or_inf:
2108         /* Handle Infinity and NaN.  */
2109         slli    a4, xh, 12
2110         or      a4, a4, xl
2111         beqz    a4, .Lfixunsdfdi_maxint
2112
2113         /* Translate NaN to 0xffffffff.... */
2114 1:      movi    xh, -1
2115         movi    xl, -1
2116         leaf_return
2117
2118 .Lfixunsdfdi_maxint:
2119         bgez    xh, 1b
2120 2:      slli    xh, a6, 11      /* 0x80000000 */
2121         movi    xl, 0
2122         leaf_return
2123
2124 .Lfixunsdfdi_zero:
2125         movi    xh, 0
2126         movi    xl, 0
2127         leaf_return
2128
2129 .Lfixunsdfdi_bigexp:
2130         /* Handle unsigned maximum exponent case.  */
2131         bltz    a7, 2b
2132         leaf_return             /* no shift needed */
2133
2134 #endif /* L_fixunsdfdi */
2135
2136 #ifdef L_floatsidf
2137
2138         .align  4
2139         .global __floatunsidf
2140         .type   __floatunsidf, @function
2141 __floatunsidf:
2142         leaf_entry sp, 16
2143         beqz    a2, .Lfloatsidf_return_zero
2144
2145         /* Set the sign to zero and jump to the floatsidf code.  */
2146         movi    a7, 0
2147         j       .Lfloatsidf_normalize
2148
2149         .align  4
2150         .global __floatsidf
2151         .type   __floatsidf, @function
2152 __floatsidf:
2153         leaf_entry sp, 16
2154
2155         /* Check for zero.  */
2156         beqz    a2, .Lfloatsidf_return_zero
2157
2158         /* Save the sign.  */
2159         extui   a7, a2, 31, 1
2160
2161         /* Get the absolute value.  */
2162 #if XCHAL_HAVE_ABS
2163         abs     a2, a2
2164 #else
2165         neg     a4, a2
2166         movltz  a2, a4, a2
2167 #endif
2168
2169 .Lfloatsidf_normalize:
2170         /* Normalize with the first 1 bit in the msb.  */
2171         do_nsau a4, a2, a5, a6
2172         ssl     a4
2173         sll     a5, a2
2174
2175         /* Shift the mantissa into position.  */
2176         srli    xh, a5, 11
2177         slli    xl, a5, (32 - 11)
2178
2179         /* Set the exponent.  */
2180         movi    a5, 0x41d       /* 0x3fe + 31 */
2181         sub     a5, a5, a4
2182         slli    a5, a5, 20
2183         add     xh, xh, a5
2184
2185         /* Add the sign and return. */
2186         slli    a7, a7, 31
2187         or      xh, xh, a7
2188         leaf_return
2189
2190 .Lfloatsidf_return_zero:
2191         movi    a3, 0
2192         leaf_return
2193
2194 #endif /* L_floatsidf */
2195
2196 #ifdef L_floatdidf
2197
2198         .align  4
2199         .global __floatundidf
2200         .type   __floatundidf, @function
2201 __floatundidf:
2202         leaf_entry sp, 16
2203
2204         /* Check for zero.  */
2205         or      a4, xh, xl
2206         beqz    a4, 2f
2207
2208         /* Set the sign to zero and jump to the floatdidf code.  */
2209         movi    a7, 0
2210         j       .Lfloatdidf_normalize
2211
2212         .align  4
2213         .global __floatdidf
2214         .type   __floatdidf, @function
2215 __floatdidf:
2216         leaf_entry sp, 16
2217
2218         /* Check for zero.  */
2219         or      a4, xh, xl
2220         beqz    a4, 2f
2221
2222         /* Save the sign.  */
2223         extui   a7, xh, 31, 1
2224
2225         /* Get the absolute value.  */
2226         bgez    xh, .Lfloatdidf_normalize
2227         neg     xl, xl
2228         neg     xh, xh
2229         beqz    xl, .Lfloatdidf_normalize
2230         addi    xh, xh, -1
2231
2232 .Lfloatdidf_normalize:
2233         /* Normalize with the first 1 bit in the msb of xh.  */
2234         beqz    xh, .Lfloatdidf_bigshift
2235         do_nsau a4, xh, a5, a6
2236         ssl     a4
2237         src     xh, xh, xl
2238         sll     xl, xl
2239
2240 .Lfloatdidf_shifted:
2241         /* Shift the mantissa into position, with rounding bits in a6.  */
2242         ssai    11
2243         sll     a6, xl
2244         src     xl, xh, xl
2245         srl     xh, xh
2246
2247         /* Set the exponent.  */
2248         movi    a5, 0x43d       /* 0x3fe + 63 */
2249         sub     a5, a5, a4
2250         slli    a5, a5, 20
2251         add     xh, xh, a5
2252
2253         /* Add the sign.  */
2254         slli    a7, a7, 31
2255         or      xh, xh, a7
2256
2257         /* Round up if the leftover fraction is >= 1/2.  */
2258         bgez    a6, 2f
2259         addi    xl, xl, 1
2260         beqz    xl, .Lfloatdidf_roundcarry
2261
2262         /* Check if the leftover fraction is exactly 1/2.  */
2263         slli    a6, a6, 1
2264         beqz    a6, .Lfloatdidf_exactlyhalf
2265 2:      leaf_return
2266
2267 .Lfloatdidf_bigshift:
2268         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2269         do_nsau a4, xl, a5, a6
2270         ssl     a4
2271         sll     xh, xl
2272         movi    xl, 0
2273         addi    a4, a4, 32
2274         j       .Lfloatdidf_shifted
2275
2276 .Lfloatdidf_exactlyhalf:
2277         /* Round down to the nearest even value.  */
2278         srli    xl, xl, 1
2279         slli    xl, xl, 1
2280         leaf_return
2281
2282 .Lfloatdidf_roundcarry:
2283         /* xl is always zero when the rounding increment overflows, so
2284            there's no need to round it to an even value.  */
2285         addi    xh, xh, 1
2286         /* Overflow to the exponent is OK.  */
2287         leaf_return
2288
2289 #endif /* L_floatdidf */
2290
2291 #ifdef L_truncdfsf2
2292
2293         .align  4
2294         .global __truncdfsf2
2295         .type   __truncdfsf2, @function
2296 __truncdfsf2:
2297         leaf_entry sp, 16
2298
2299         /* Adjust the exponent bias.  */
2300         movi    a4, (0x3ff - 0x7f) << 20
2301         sub     a5, xh, a4
2302
2303         /* Check for underflow.  */
2304         xor     a6, xh, a5
2305         bltz    a6, .Ltrunc_underflow
2306         extui   a6, a5, 20, 11
2307         beqz    a6, .Ltrunc_underflow
2308
2309         /* Check for overflow.  */
2310         movi    a4, 255
2311         bge     a6, a4, .Ltrunc_overflow
2312
2313         /* Shift a5/xl << 3 into a5/a4.  */
2314         ssai    (32 - 3)
2315         src     a5, a5, xl
2316         sll     a4, xl
2317
2318 .Ltrunc_addsign:
2319         /* Add the sign bit.  */
2320         extui   a6, xh, 31, 1
2321         slli    a6, a6, 31
2322         or      a2, a6, a5
2323
2324         /* Round up if the leftover fraction is >= 1/2.  */
2325         bgez    a4, 1f
2326         addi    a2, a2, 1
2327         /* Overflow to the exponent is OK.  The answer will be correct.  */
2328
2329         /* Check if the leftover fraction is exactly 1/2.  */
2330         slli    a4, a4, 1
2331         beqz    a4, .Ltrunc_exactlyhalf
2332 1:      leaf_return
2333
2334 .Ltrunc_exactlyhalf:
2335         /* Round down to the nearest even value.  */
2336         srli    a2, a2, 1
2337         slli    a2, a2, 1
2338         leaf_return
2339
2340 .Ltrunc_overflow:
2341         /* Check if exponent == 0x7ff.  */
2342         movi    a4, 0x7ff00000
2343         bnall   xh, a4, 1f
2344
2345         /* Check if mantissa is nonzero.  */
2346         slli    a5, xh, 12
2347         or      a5, a5, xl
2348         beqz    a5, 1f
2349
2350         /* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2351         srli    a4, a4, 1
2352
2353 1:      slli    a4, a4, 4       /* 0xff000000 or 0xff800000 */
2354         /* Add the sign bit.  */
2355         extui   a6, xh, 31, 1
2356         ssai    1
2357         src     a2, a6, a4
2358         leaf_return
2359
2360 .Ltrunc_underflow:
2361         /* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2362         extui   a6, xh, 20, 11
2363         movi    a5, 0x3ff - 0x7f
2364         sub     a6, a5, a6
2365         addi    a6, a6, 1
2366         bgeui   a6, 32, 1f
2367
2368         /* Replace the exponent with an explicit "1.0".  */
2369         slli    a5, a5, 13      /* 0x700000 */
2370         or      a5, a5, xh
2371         slli    a5, a5, 11
2372         srli    a5, a5, 11
2373
2374         /* Shift the mantissa left by 3 bits (into a5/a4).  */
2375         ssai    (32 - 3)
2376         src     a5, a5, xl
2377         sll     a4, xl
2378
2379         /* Shift right by a6.  */
2380         ssr     a6
2381         sll     a7, a4
2382         src     a4, a5, a4
2383         srl     a5, a5
2384         beqz    a7, .Ltrunc_addsign
2385         or      a4, a4, a6      /* any positive, nonzero value will work */
2386         j       .Ltrunc_addsign
2387
2388         /* Return +/- zero.  */
2389 1:      extui   a2, xh, 31, 1
2390         slli    a2, a2, 31
2391         leaf_return
2392
2393 #endif /* L_truncdfsf2 */
2394
2395 #ifdef L_extendsfdf2
2396
2397         .align  4
2398         .global __extendsfdf2
2399         .type   __extendsfdf2, @function
2400 __extendsfdf2:
2401         leaf_entry sp, 16
2402
2403         /* Save the sign bit and then shift it off.  */
2404         extui   a5, a2, 31, 1
2405         slli    a5, a5, 31
2406         slli    a4, a2, 1
2407
2408         /* Extract and check the exponent.  */
2409         extui   a6, a2, 23, 8
2410         beqz    a6, .Lextend_expzero
2411         addi    a6, a6, 1
2412         beqi    a6, 256, .Lextend_nan_or_inf
2413
2414         /* Shift >> 3 into a4/xl.  */
2415         srli    a4, a4, 4
2416         slli    xl, a2, (32 - 3)
2417
2418         /* Adjust the exponent bias.  */
2419         movi    a6, (0x3ff - 0x7f) << 20
2420         add     a4, a4, a6
2421
2422         /* Add the sign bit.  */
2423         or      xh, a4, a5
2424         leaf_return
2425
2426 .Lextend_nan_or_inf:
2427         movi    a4, 0x7ff00000
2428
2429         /* Check for NaN.  */
2430         slli    a7, a2, 9
2431         beqz    a7, 1f
2432
2433         slli    a6, a6, 11      /* 0x80000 */
2434         or      a4, a4, a6
2435
2436         /* Add the sign and return.  */
2437 1:      or      xh, a4, a5
2438         movi    xl, 0
2439         leaf_return
2440
2441 .Lextend_expzero:
2442         beqz    a4, 1b
2443
2444         /* Normalize it to have 8 zero bits before the first 1 bit.  */
2445         do_nsau a7, a4, a2, a3
2446         addi    a7, a7, -8
2447         ssl     a7
2448         sll     a4, a4
2449
2450         /* Shift >> 3 into a4/xl.  */
2451         slli    xl, a4, (32 - 3)
2452         srli    a4, a4, 3
2453
2454         /* Set the exponent.  */
2455         movi    a6, 0x3fe - 0x7f
2456         sub     a6, a6, a7
2457         slli    a6, a6, 20
2458         add     a4, a4, a6
2459
2460         /* Add the sign and return.  */
2461         or      xh, a4, a5
2462         leaf_return
2463
2464 #endif /* L_extendsfdf2 */
2465
2466
2467 #if XCHAL_HAVE_DFP_SQRT
2468 #ifdef L_sqrt
2469
2470         .text
2471         .align 4
2472         .global __ieee754_sqrt
2473         .type   __ieee754_sqrt, @function
2474 __ieee754_sqrt:
2475         leaf_entry      sp, 16
2476
2477         wfrd            f1, xh, xl
2478
2479         sqrt0.d         f2, f1
2480         const.d         f4, 0
2481         maddn.d         f4, f2, f2
2482         nexp01.d        f3, f1
2483         const.d         f0, 3
2484         addexp.d        f3, f0
2485         maddn.d         f0, f4, f3
2486         nexp01.d        f4, f1
2487         maddn.d         f2, f0, f2
2488         const.d         f5, 0
2489         maddn.d         f5, f2, f3
2490         const.d         f0, 3
2491         maddn.d         f0, f5, f2
2492         neg.d           f6, f4
2493         maddn.d         f2, f0, f2
2494         const.d         f0, 0
2495         const.d         f5, 0
2496         const.d         f7, 0
2497         maddn.d         f0, f6, f2
2498         maddn.d         f5, f2, f3
2499         const.d         f3, 3
2500         maddn.d         f7, f3, f2
2501         maddn.d         f4, f0, f0
2502         maddn.d         f3, f5, f2
2503         neg.d           f2, f7
2504         maddn.d         f0, f4, f2
2505         maddn.d         f7, f3, f7
2506         mksadj.d        f2, f1
2507         nexp01.d        f1, f1
2508         maddn.d         f1, f0, f0
2509         neg.d           f3, f7
2510         addexpm.d       f0, f2
2511         addexp.d        f3, f2
2512         divn.d          f0, f1, f3
2513
2514         rfr             xl, f0
2515         rfrd            xh, f0
2516
2517         leaf_return
2518
2519 #endif /* L_sqrt */
2520 #endif /* XCHAL_HAVE_DFP_SQRT */
2521
2522 #if XCHAL_HAVE_DFP_RECIP
2523 #ifdef L_recipdf2
2524         /* Reciprocal */
2525
2526         .align  4
2527         .global __recipdf2
2528         .type   __recipdf2, @function
2529 __recipdf2:
2530         leaf_entry      sp, 16
2531
2532         wfrd            f1, xh, xl
2533
2534         recip0.d        f0, f1
2535         const.d         f2, 2
2536         msub.d          f2, f1, f0
2537         mul.d           f3, f1, f0
2538         const.d         f4, 2
2539         mul.d           f5, f0, f2
2540         msub.d          f4, f3, f2
2541         const.d         f2, 1
2542         mul.d           f0, f5, f4
2543         msub.d          f2, f1, f0
2544         maddn.d         f0, f0, f2
2545
2546         rfr             xl, f0
2547         rfrd            xh, f0
2548
2549         leaf_return
2550
2551 #endif /* L_recipdf2 */
2552 #endif /* XCHAL_HAVE_DFP_RECIP */
2553
2554 #if XCHAL_HAVE_DFP_RSQRT
2555 #ifdef L_rsqrtdf2
2556         /* Reciprocal square root */
2557
2558         .align  4
2559         .global __rsqrtdf2
2560         .type   __rsqrtdf2, @function
2561 __rsqrtdf2:
2562         leaf_entry      sp, 16
2563
2564         wfrd            f1, xh, xl
2565
2566         rsqrt0.d        f0, f1
2567         mul.d           f2, f1, f0
2568         const.d         f3, 3
2569         mul.d           f4, f3, f0
2570         const.d         f5, 1
2571         msub.d          f5, f2, f0
2572         maddn.d         f0, f4, f5
2573         const.d         f2, 1
2574         mul.d           f4, f1, f0
2575         mul.d           f5, f3, f0
2576         msub.d          f2, f4, f0
2577         maddn.d         f0, f5, f2
2578         const.d         f2, 1
2579         mul.d           f1, f1, f0
2580         mul.d           f3, f3, f0
2581         msub.d          f2, f1, f0
2582         maddn.d         f0, f3, f2
2583
2584         rfr             xl, f0
2585         rfrd            xh, f0
2586
2587         leaf_return
2588
2589 #endif /* L_rsqrtdf2 */
2590 #endif /* XCHAL_HAVE_DFP_RSQRT */