1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
47 .type __negsf2, @function
61 /* Handle NaNs and Infinities. (This code is placed before the
62 start of the function just to keep it in range of the limited
63 branch displacements.) */
66 /* If y is neither Infinity nor NaN, return x. */
67 bnall a3, a6, .Ladd_return_nan_or_inf
68 /* If x is a NaN, return it. Otherwise, return y. */
70 bnez a7, .Ladd_return_nan
76 .Ladd_return_nan_or_inf:
78 bnez a7, .Ladd_return_nan
82 movi a6, 0x400000 /* make it a quiet NaN */
87 /* Operand signs differ. Do a subtraction. */
94 .type __addsf3, @function
99 /* Check if the two operands have the same sign. */
101 bltz a7, .Ladd_opposite_signs
104 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
105 ball a2, a6, .Ladd_xnan_or_inf
106 ball a3, a6, .Ladd_ynan_or_inf
108 /* Compare the exponents. The smaller operand will be shifted
109 right by the exponent difference and added to the larger
113 bltu a7, a8, .Ladd_shiftx
116 /* Check if the smaller (or equal) exponent is zero. */
117 bnone a3, a6, .Ladd_yexpzero
119 /* Replace y sign/exponent with 0x008. */
125 /* Compute the exponent difference. */
128 /* Exponent difference > 32 -- just return the bigger value. */
131 /* Shift y right by the exponent difference. Any bits that are
132 shifted out of y are saved in a9 for rounding the result. */
138 /* Do the addition. */
141 /* Check if the add overflowed into the exponent. */
143 beq a10, a7, .Ladd_round
148 /* y is a subnormal value. Replace its sign/exponent with zero,
149 i.e., no implicit "1.0", and increment the apparent exponent
150 because subnormals behave as if they had the minimum (nonzero)
151 exponent. Test for the case when both exponents are zero. */
154 bnone a2, a6, .Ladd_bothexpzero
159 /* Both exponents are zero. Handle this as a special case. There
160 is no need to shift or round, and the normal code for handling
161 a carry into the exponent field will not work because it
162 assumes there is an implicit "1.0" that needs to be added. */
167 /* Same as "yexpzero" except skip handling the case when both
168 exponents are zero. */
175 /* Same thing as the "shifty" code, but with x and y swapped. Also,
176 because the exponent difference is always nonzero in this version,
177 the shift sequence can use SLL and skip loading a constant zero. */
178 bnone a2, a6, .Ladd_xexpzero
186 bgeui a10, 32, .Ladd_returny
194 /* Check if the add overflowed into the exponent. */
196 bne a10, a8, .Ladd_carry
199 /* Round up if the leftover fraction is >= 1/2. */
203 /* Check if the leftover fraction is exactly 1/2. */
205 beqz a9, .Ladd_exactlyhalf
213 /* The addition has overflowed into the exponent field, so the
214 value needs to be renormalized. The mantissa of the result
215 can be recovered by subtracting the original exponent and
216 adding 0x800000 (which is the explicit "1.0" for the
217 mantissa of the non-shifted operand -- the "1.0" for the
218 shifted operand was already added). The mantissa can then
219 be shifted right by one bit. The explicit "1.0" of the
220 shifted mantissa then needs to be replaced by the exponent,
221 incremented by one to account for the normalizing shift.
222 It is faster to combine these operations: do the shift first
223 and combine the additions and subtractions. If x is the
224 original exponent, the result is:
225 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
227 shifted mantissa + ((x + 1) << 22)
228 Note that the exponent is incremented here by leaving the
229 explicit "1.0" of the mantissa in the exponent field. */
231 /* Shift x right by one bit. Save the lsb. */
235 /* See explanation above. The original exponent is in a8. */
240 /* Return an Infinity if the exponent overflowed. */
241 ball a2, a6, .Ladd_infinity
243 /* Same thing as the "round" code except the msb of the leftover
244 fraction is bit 0 of a10, with the rest of the fraction in a9. */
247 beqz a9, .Ladd_exactlyhalf
251 /* Clear the mantissa. */
255 /* The sign bit may have been lost in a carry-out. Put it back. */
261 /* Round down to the nearest even value. */
270 /* Handle NaNs and Infinities. (This code is placed before the
271 start of the function just to keep it in range of the limited
272 branch displacements.) */
275 /* If y is neither Infinity nor NaN, return x. */
276 bnall a3, a6, .Lsub_return_nan_or_inf
277 /* Both x and y are either NaN or Inf, so the result is NaN. */
280 movi a4, 0x400000 /* make it a quiet NaN */
285 /* Negate y and return it. */
289 .Lsub_return_nan_or_inf:
291 bnez a7, .Lsub_return_nan
294 .Lsub_opposite_signs:
295 /* Operand signs differ. Do an addition. */
302 .type __subsf3, @function
307 /* Check if the two operands have the same sign. */
309 bltz a7, .Lsub_opposite_signs
312 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
313 ball a2, a6, .Lsub_xnan_or_inf
314 ball a3, a6, .Lsub_ynan_or_inf
316 /* Compare the operands. In contrast to addition, the entire
317 value matters here. */
320 bltu a2, a3, .Lsub_xsmaller
323 /* Check if the smaller (or equal) exponent is zero. */
324 bnone a3, a6, .Lsub_yexpzero
326 /* Replace y sign/exponent with 0x008. */
332 /* Compute the exponent difference. */
335 /* Exponent difference > 32 -- just return the bigger value. */
338 /* Shift y right by the exponent difference. Any bits that are
339 shifted out of y are saved in a9 for rounding the result. */
347 /* Subtract the leftover bits in a9 from zero and propagate any
353 /* Check if the subtract underflowed into the exponent. */
355 beq a10, a7, .Lsub_round
359 /* Return zero if the inputs are equal. (For the non-subnormal
360 case, subtracting the "1.0" will cause a borrow from the exponent
361 and this case can be detected when handling the borrow.) */
362 beq a2, a3, .Lsub_return_zero
364 /* y is a subnormal value. Replace its sign/exponent with zero,
365 i.e., no implicit "1.0". Unless x is also a subnormal, increment
366 y's apparent exponent because subnormals behave as if they had
367 the minimum (nonzero) exponent. */
370 bnone a2, a6, .Lsub_yexpdiff
375 /* Negate and return y. */
381 /* Same thing as the "ysmaller" code, but with x and y swapped and
383 bnone a2, a6, .Lsub_xexpzero
391 bgeui a10, 32, .Lsub_returny
408 /* Check if the subtract underflowed into the exponent. */
410 bne a10, a8, .Lsub_borrow
413 /* Round up if the leftover fraction is >= 1/2. */
417 /* Check if the leftover fraction is exactly 1/2. */
419 beqz a9, .Lsub_exactlyhalf
423 /* Same as "yexpzero". */
424 beq a2, a3, .Lsub_return_zero
427 bnone a3, a6, .Lsub_xexpdiff
436 /* The subtraction has underflowed into the exponent field, so the
437 value needs to be renormalized. Shift the mantissa left as
438 needed to remove any leading zeros and adjust the exponent
439 accordingly. If the exponent is not large enough to remove
440 all the leading zeros, the result will be a subnormal value. */
444 do_nsau a6, a8, a7, a11
446 bge a6, a10, .Lsub_subnormal
449 .Lsub_normalize_shift:
450 /* Shift the mantissa (a8/a9) left by a6. */
455 /* Combine the shifted mantissa with the sign and exponent,
456 decrementing the exponent by a6. (The exponent has already
457 been decremented by one due to the borrow from the subtraction,
458 but adding the mantissa will increment the exponent by one.) */
466 /* Round down to the nearest even value. */
472 /* If there was a borrow from the exponent, and the mantissa and
473 guard digits are all zero, then the inputs were equal and the
474 result should be zero. */
475 beqz a9, .Lsub_return_zero
477 /* Only the guard digit is nonzero. Shift by min(24, a10). */
481 j .Lsub_normalize_shift
484 /* The exponent is too small to shift away all the leading zeros.
485 Set a6 to the current exponent (which has already been
486 decremented by the borrow) so that the exponent of the result
487 will be zero. Do not add 1 to a6 in this case, because: (1)
488 adding the mantissa will not increment the exponent, so there is
489 no need to subtract anything extra from the exponent to
490 compensate, and (2) the effective exponent of a subnormal is 1
491 not 0 so the shift amount must be 1 smaller than normal. */
493 j .Lsub_normalize_shift
495 #endif /* L_addsubsf3 */
500 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
501 #define XCHAL_NO_MUL 1
507 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
508 (This code is placed before the start of the function just to
509 keep it in range of the limited branch displacements.) */
512 /* Clear the sign bit of x. */
516 /* If x is zero, return zero. */
517 beqz a2, .Lmul_return_zero
519 /* Normalize x. Adjust the exponent in a8. */
520 do_nsau a10, a2, a11, a12
529 /* Clear the sign bit of y. */
533 /* If y is zero, return zero. */
534 beqz a3, .Lmul_return_zero
536 /* Normalize y. Adjust the exponent in a9. */
537 do_nsau a10, a3, a11, a12
546 /* Return zero with the appropriate sign bit. */
552 /* If y is zero, return NaN. */
554 beqz a8, .Lmul_return_nan
555 /* If y is NaN, return y. */
556 bnall a3, a6, .Lmul_returnx
558 beqz a8, .Lmul_returnx
565 bnez a8, .Lmul_return_nan
566 /* Set the sign bit and return. */
574 /* If x is zero, return NaN. */
576 bnez a8, .Lmul_returny
580 movi a4, 0x400000 /* make it a quiet NaN */
586 .type __mulsf3, @function
588 #if __XTENSA_CALL0_ABI__
596 /* This is not really a leaf function; allocate enough stack space
597 to allow CALL12s to a helper function. */
604 /* Get the sign of the result. */
607 /* Check for NaN and infinity. */
608 ball a2, a6, .Lmul_xnan_or_inf
609 ball a3, a6, .Lmul_ynan_or_inf
611 /* Extract the exponents. */
615 beqz a8, .Lmul_xexpzero
617 beqz a9, .Lmul_yexpzero
620 /* Add the exponents. */
623 /* Replace sign/exponent fields with explicit "1.0". */
630 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
632 #if XCHAL_HAVE_MUL32_HIGH
639 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
640 products. These partial products are:
649 If using the Mul16 or Mul32 multiplier options, these input
650 chunks must be stored in separate registers. For Mac16, the
651 UMUL.AA.* opcodes can specify that the inputs come from either
652 half of the registers, so there is no need to shift them out
653 ahead of time. If there is no multiply hardware, the 16-bit
654 chunks can be extracted when setting up the arguments to the
655 separate multiply function. */
657 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
658 /* Calling a separate multiply function will clobber a0 and requires
659 use of a8 as a temporary, so save those values now. (The function
660 uses a custom ABI so nothing else needs to be saved.) */
665 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
670 /* Get the high halves of the inputs into registers. */
677 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
678 /* Clear the high halves of the inputs. This does not matter
679 for MUL16 because the high bits are ignored. */
683 #endif /* MUL16 || MUL32 */
688 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
689 mul16u dst, xreg ## xhalf, yreg ## yhalf
691 #elif XCHAL_HAVE_MUL32
693 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
694 mull dst, xreg ## xhalf, yreg ## yhalf
696 #elif XCHAL_HAVE_MAC16
698 /* The preprocessor insists on inserting a space when concatenating after
699 a period in the definition of do_mul below. These macros are a workaround
700 using underscores instead of periods when doing the concatenation. */
701 #define umul_aa_ll umul.aa.ll
702 #define umul_aa_lh umul.aa.lh
703 #define umul_aa_hl umul.aa.hl
704 #define umul_aa_hh umul.aa.hh
706 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
707 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
710 #else /* no multiply hardware */
712 #define set_arg_l(dst, src) \
713 extui dst, src, 0, 16
714 #define set_arg_h(dst, src) \
717 #if __XTENSA_CALL0_ABI__
718 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
719 set_arg_ ## xhalf (a13, xreg); \
720 set_arg_ ## yhalf (a14, yreg); \
721 call0 .Lmul_mulsi3; \
724 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
725 set_arg_ ## xhalf (a14, xreg); \
726 set_arg_ ## yhalf (a15, yreg); \
727 call12 .Lmul_mulsi3; \
729 #endif /* __XTENSA_CALL0_ABI__ */
731 #endif /* no multiply hardware */
733 /* Add pp1 and pp2 into a6 with carry-out in a9. */
734 do_mul(a6, a2, l, a3, h) /* pp 1 */
735 do_mul(a11, a2, h, a3, l) /* pp 2 */
741 /* Shift the high half of a9/a6 into position in a9. Note that
742 this value can be safely incremented without any carry-outs. */
746 /* Compute the low word into a6. */
747 do_mul(a11, a2, l, a3, l) /* pp 0 */
753 /* Compute the high word into a2. */
754 do_mul(a2, a2, h, a3, h) /* pp 3 */
757 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
758 /* Restore values saved on the stack during the multiplication. */
762 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
764 /* Shift left by 9 bits, unless there was a carry-out from the
765 multiply, in which case, shift by 8 bits and increment the
776 /* Subtract the extra bias from the exponent sum (plus one to account
777 for the explicit "1.0" of the mantissa that will be added to the
778 exponent in the final result). */
782 /* Check for over/underflow. The value in a8 is one less than the
783 final exponent, so values in the range 0..fd are OK here. */
785 bgeu a8, a4, .Lmul_overflow
789 bgez a6, .Lmul_rounded
792 beqz a6, .Lmul_exactlyhalf
795 /* Add the exponent to the mantissa. */
800 /* Add the sign bit. */
806 #if __XTENSA_CALL0_ABI__
816 /* Round down to the nearest even value. */
822 bltz a8, .Lmul_underflow
823 /* Return +/- Infinity. */
829 /* Create a subnormal value, where the exponent field contains zero,
830 but the effective exponent is 1. The value of a8 is one less than
831 the actual exponent, so just negate it to get the shift amount. */
835 bgeui a8, 32, .Lmul_flush_to_zero
837 /* Shift a2 right. Any bits that are shifted out of a2 are saved
838 in a6 (combined with the shifted-out bits currently in a6) for
839 rounding the result. */
843 /* Set the exponent to zero. */
846 /* Pack any nonzero bits shifted out into a6. */
853 /* Return zero with the appropriate sign bit. */
860 /* For Xtensa processors with no multiply hardware, this simplified
861 version of _mulsi3 is used for multiplying 16-bit chunks of
862 the floating-point mantissas. When using CALL0, this function
863 uses a custom ABI: the inputs are passed in a13 and a14, the
864 result is returned in a12, and a8 and a15 are clobbered. */
868 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
870 1: add \tmp1, \src2, \dst
871 extui \tmp2, \src1, 0, 1
872 movnez \dst, \tmp1, \tmp2
874 do_addx2 \tmp1, \src2, \dst, \tmp1
875 extui \tmp2, \src1, 1, 1
876 movnez \dst, \tmp1, \tmp2
878 do_addx4 \tmp1, \src2, \dst, \tmp1
879 extui \tmp2, \src1, 2, 1
880 movnez \dst, \tmp1, \tmp2
882 do_addx8 \tmp1, \src2, \dst, \tmp1
883 extui \tmp2, \src1, 3, 1
884 movnez \dst, \tmp1, \tmp2
890 #if __XTENSA_CALL0_ABI__
891 mul_mulsi3_body a12, a13, a14, a15, a8
893 /* The result will be written into a2, so save that argument in a4. */
895 mul_mulsi3_body a2, a4, a3, a5, a6
898 #endif /* XCHAL_NO_MUL */
899 #endif /* L_mulsf3 */
905 #if XCHAL_HAVE_FP_DIV
909 .type __divsf3, @function
913 wfr f1, a2 /* dividend */
914 wfr f2, a3 /* divisor */
951 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
952 (This code is placed before the start of the function just to
953 keep it in range of the limited branch displacements.) */
956 /* Clear the sign bit of y. */
960 /* Check for division by zero. */
963 /* Normalize y. Adjust the exponent in a9. */
964 do_nsau a10, a3, a4, a5
973 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
980 movi a4, 0x400000 /* make it a quiet NaN */
985 /* Clear the sign bit of x. */
989 /* If x is zero, return zero. */
990 beqz a2, .Ldiv_return_zero
992 /* Normalize x. Adjust the exponent in a8. */
993 do_nsau a10, a2, a4, a5
1002 /* Return zero with the appropriate sign bit. */
1008 /* Set the sign bit of the result. */
1012 /* If y is NaN or Inf, return NaN. */
1013 ball a3, a6, .Ldiv_return_nan
1015 bnez a7, .Ldiv_return_nan
1019 /* If y is Infinity, return zero. */
1021 beqz a8, .Ldiv_return_zero
1022 /* y is NaN; return it. */
1026 movi a4, 0x400000 /* make it a quiet NaN */
1032 .type __divsf3, @function
1037 /* Get the sign of the result. */
1040 /* Check for NaN and infinity. */
1041 ball a2, a6, .Ldiv_xnan_or_inf
1042 ball a3, a6, .Ldiv_ynan_or_inf
1044 /* Extract the exponents. */
1048 beqz a9, .Ldiv_yexpzero
1050 beqz a8, .Ldiv_xexpzero
1053 /* Subtract the exponents. */
1056 /* Replace sign/exponent fields with explicit "1.0". */
1063 /* The first digit of the mantissa division must be a one.
1064 Shift x (and adjust the exponent) as needed to make this true. */
1069 /* Do the first subtraction and shift. */
1073 /* Put the quotient into a10. */
1076 /* Divide one bit at a time for 23 bits. */
1078 #if XCHAL_HAVE_LOOPS
1079 loop a9, .Ldiv_loopend
1082 /* Shift the quotient << 1. */
1085 /* Is this digit a 0 or 1? */
1088 /* Output a 1 and subtract. */
1092 /* Shift the dividend << 1. */
1095 #if !XCHAL_HAVE_LOOPS
1101 /* Add the exponent bias (less one to account for the explicit "1.0"
1102 of the mantissa that will be added to the exponent in the final
1106 /* Check for over/underflow. The value in a8 is one less than the
1107 final exponent, so values in the range 0..fd are OK here. */
1109 bgeu a8, a4, .Ldiv_overflow
1112 /* Round. The remainder (<< 1) is in a2. */
1113 bltu a2, a3, .Ldiv_rounded
1115 beq a2, a3, .Ldiv_exactlyhalf
1118 /* Add the exponent to the mantissa. */
1123 /* Add the sign bit. */
1130 bltz a8, .Ldiv_underflow
1131 /* Return +/- Infinity. */
1132 addi a8, a4, 1 /* 0xff */
1137 /* Remainder is exactly half the divisor. Round even. */
1143 /* Create a subnormal value, where the exponent field contains zero,
1144 but the effective exponent is 1. The value of a8 is one less than
1145 the actual exponent, so just negate it to get the shift amount. */
1148 bgeui a8, 32, .Ldiv_flush_to_zero
1150 /* Shift a10 right. Any bits that are shifted out of a10 are
1151 saved in a6 for rounding the result. */
1155 /* Set the exponent to zero. */
1158 /* Pack any nonzero remainder (in a2) into a6. */
1163 /* Round a10 based on the bits shifted out into a6. */
1164 1: bgez a6, .Ldiv_rounded
1167 bnez a6, .Ldiv_rounded
1172 .Ldiv_flush_to_zero:
1173 /* Return zero with the appropriate sign bit. */
1178 #endif /* XCHAL_HAVE_FP_DIV */
1180 #endif /* L_divsf3 */
1184 /* Equal and Not Equal */
1189 .set __nesf2, __eqsf2
1190 .type __eqsf2, @function
1195 /* The values are equal but NaN != NaN. Check the exponent. */
1207 /* Check if the mantissas are nonzero. */
1211 /* Check if x and y are zero with different signs. */
1215 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1216 or x when exponent(x) = 0x7f8 and x == y. */
1227 .type __gtsf2, @function
1232 1: bnall a3, a6, .Lle_cmp
1234 /* Check if y is a NaN. */
1240 /* Check if x is a NaN. */
1247 /* Less Than or Equal */
1251 .type __lesf2, @function
1256 1: bnall a3, a6, .Lle_cmp
1258 /* Check if y is a NaN. */
1264 /* Check if x is a NaN. */
1271 /* Check if x and y have different signs. */
1273 bltz a7, .Lle_diff_signs
1275 /* Check if x is negative. */
1278 /* Check if x <= y. */
1284 /* Check if y <= x. */
1292 /* Check if both x and y are zero. */
1301 /* Greater Than or Equal */
1305 .type __gesf2, @function
1310 1: bnall a3, a6, .Llt_cmp
1312 /* Check if y is a NaN. */
1318 /* Check if x is a NaN. */
1329 .type __ltsf2, @function
1334 1: bnall a3, a6, .Llt_cmp
1336 /* Check if y is a NaN. */
1342 /* Check if x is a NaN. */
1349 /* Check if x and y have different signs. */
1351 bltz a7, .Llt_diff_signs
1353 /* Check if x is negative. */
1356 /* Check if x < y. */
1362 /* Check if y < x. */
1370 /* Check if both x and y are nonzero. */
1383 .type __unordsf2, @function
1402 #endif /* L_cmpsf2 */
1408 .type __fixsfsi, @function
1412 /* Check for NaN and Infinity. */
1414 ball a2, a6, .Lfixsfsi_nan_or_inf
1416 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1419 bgei a4, 32, .Lfixsfsi_maxint
1420 blti a4, 1, .Lfixsfsi_zero
1422 /* Add explicit "1.0" and shift << 8. */
1426 /* Shift back to the right, based on the exponent. */
1427 ssl a4 /* shift by 32 - a4 */
1430 /* Negate the result if sign != 0. */
1435 .Lfixsfsi_nan_or_inf:
1436 /* Handle Infinity and NaN. */
1438 beqz a4, .Lfixsfsi_maxint
1440 /* Translate NaN to +maxint. */
1444 slli a4, a6, 8 /* 0x80000000 */
1445 addi a5, a4, -1 /* 0x7fffffff */
1454 #endif /* L_fixsfsi */
1460 .type __fixsfdi, @function
1464 /* Check for NaN and Infinity. */
1466 ball a2, a6, .Lfixsfdi_nan_or_inf
1468 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1471 bgei a4, 64, .Lfixsfdi_maxint
1472 blti a4, 1, .Lfixsfdi_zero
1474 /* Add explicit "1.0" and shift << 8. */
1478 /* Shift back to the right, based on the exponent. */
1479 ssl a4 /* shift by 64 - a4 */
1480 bgei a4, 32, .Lfixsfdi_smallshift
1485 /* Negate the result if sign != 0. */
1493 .Lfixsfdi_smallshift:
1499 .Lfixsfdi_nan_or_inf:
1500 /* Handle Infinity and NaN. */
1502 beqz a4, .Lfixsfdi_maxint
1504 /* Translate NaN to +maxint. */
1508 slli a7, a6, 8 /* 0x80000000 */
1514 1: addi xh, a7, -1 /* 0x7fffffff */
1523 #endif /* L_fixsfdi */
1528 .global __fixunssfsi
1529 .type __fixunssfsi, @function
1533 /* Check for NaN and Infinity. */
1535 ball a2, a6, .Lfixunssfsi_nan_or_inf
1537 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1540 bgei a4, 32, .Lfixunssfsi_maxint
1541 bltz a4, .Lfixunssfsi_zero
1543 /* Add explicit "1.0" and shift << 8. */
1547 /* Shift back to the right, based on the exponent. */
1549 beqi a4, 32, .Lfixunssfsi_bigexp
1550 ssl a4 /* shift by 32 - a4 */
1553 /* Negate the result if sign != 0. */
1558 .Lfixunssfsi_nan_or_inf:
1559 /* Handle Infinity and NaN. */
1561 beqz a4, .Lfixunssfsi_maxint
1563 /* Translate NaN to 0xffffffff. */
1567 .Lfixunssfsi_maxint:
1568 slli a4, a6, 8 /* 0x80000000 */
1569 movi a5, -1 /* 0xffffffff */
1578 .Lfixunssfsi_bigexp:
1579 /* Handle unsigned maximum exponent case. */
1581 mov a2, a5 /* no shift needed */
1584 /* Return 0x80000000 if negative. */
1588 #endif /* L_fixunssfsi */
1593 .global __fixunssfdi
1594 .type __fixunssfdi, @function
1598 /* Check for NaN and Infinity. */
1600 ball a2, a6, .Lfixunssfdi_nan_or_inf
1602 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1605 bgei a4, 64, .Lfixunssfdi_maxint
1606 bltz a4, .Lfixunssfdi_zero
1608 /* Add explicit "1.0" and shift << 8. */
1612 /* Shift back to the right, based on the exponent. */
1614 beqi a4, 64, .Lfixunssfdi_bigexp
1615 ssl a4 /* shift by 64 - a4 */
1616 bgei a4, 32, .Lfixunssfdi_smallshift
1620 .Lfixunssfdi_shifted:
1621 /* Negate the result if sign != 0. */
1629 .Lfixunssfdi_smallshift:
1633 j .Lfixunssfdi_shifted
1635 .Lfixunssfdi_nan_or_inf:
1636 /* Handle Infinity and NaN. */
1638 beqz a4, .Lfixunssfdi_maxint
1640 /* Translate NaN to 0xffffffff.... */
1645 .Lfixunssfdi_maxint:
1647 2: slli xh, a6, 8 /* 0x80000000 */
1656 .Lfixunssfdi_bigexp:
1657 /* Handle unsigned maximum exponent case. */
1660 leaf_return /* no shift needed */
1662 #endif /* L_fixunssfdi */
1667 .global __floatunsisf
1668 .type __floatunsisf, @function
1671 beqz a2, .Lfloatsisf_return
1673 /* Set the sign to zero and jump to the floatsisf code. */
1675 j .Lfloatsisf_normalize
1679 .type __floatsisf, @function
1683 /* Check for zero. */
1684 beqz a2, .Lfloatsisf_return
1686 /* Save the sign. */
1689 /* Get the absolute value. */
1697 .Lfloatsisf_normalize:
1698 /* Normalize with the first 1 bit in the msb. */
1699 do_nsau a4, a2, a5, a6
1703 /* Shift the mantissa into position, with rounding bits in a6. */
1705 slli a6, a5, (32 - 8)
1707 /* Set the exponent. */
1708 movi a5, 0x9d /* 0x7e + 31 */
1717 /* Round up if the leftover fraction is >= 1/2. */
1718 bgez a6, .Lfloatsisf_return
1719 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1721 /* Check if the leftover fraction is exactly 1/2. */
1723 beqz a6, .Lfloatsisf_exactlyhalf
1728 .Lfloatsisf_exactlyhalf:
1729 /* Round down to the nearest even value. */
1734 #endif /* L_floatsisf */
1739 .global __floatundisf
1740 .type __floatundisf, @function
1744 /* Check for zero. */
1748 /* Set the sign to zero and jump to the floatdisf code. */
1750 j .Lfloatdisf_normalize
1754 .type __floatdisf, @function
1758 /* Check for zero. */
1762 /* Save the sign. */
1765 /* Get the absolute value. */
1766 bgez xh, .Lfloatdisf_normalize
1769 beqz xl, .Lfloatdisf_normalize
1772 .Lfloatdisf_normalize:
1773 /* Normalize with the first 1 bit in the msb of xh. */
1774 beqz xh, .Lfloatdisf_bigshift
1775 do_nsau a4, xh, a5, a6
1780 .Lfloatdisf_shifted:
1781 /* Shift the mantissa into position, with rounding bits in a6. */
1790 /* Set the exponent. */
1791 movi a5, 0xbd /* 0x7e + 63 */
1800 /* Round up if the leftover fraction is >= 1/2. */
1802 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1804 /* Check if the leftover fraction is exactly 1/2. */
1806 beqz a6, .Lfloatdisf_exactlyhalf
1809 .Lfloatdisf_bigshift:
1810 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1811 do_nsau a4, xl, a5, a6
1816 j .Lfloatdisf_shifted
1818 .Lfloatdisf_exactlyhalf:
1819 /* Round down to the nearest even value. */
1824 #endif /* L_floatdisf */
1826 #if XCHAL_HAVE_FP_SQRT
1831 .global __ieee754_sqrtf
1832 .type __ieee754_sqrtf, @function
1872 #endif /* L_sqrtf */
1873 #endif /* XCHAL_HAVE_FP_SQRT */
1875 #if XCHAL_HAVE_FP_RECIP
1881 .type __recipsf2, @function
1899 #endif /* L_recipsf2 */
1900 #endif /* XCHAL_HAVE_FP_RECIP */
1902 #if XCHAL_HAVE_FP_RSQRT
1904 /* Reciprocal square root */
1908 .type __rsqrtsf2, @function
1931 #endif /* L_rsqrtsf2 */
1932 #endif /* XCHAL_HAVE_FP_RSQRT */