1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
47 .type __negsf2, @function
62 /* Handle NaNs and Infinities. (This code is placed before the
63 start of the function just to keep it in range of the limited
64 branch displacements.) */
67 /* If y is neither Infinity nor NaN, return x. */
68 bnall a3, a6, .Ladd_return_nan_or_inf
69 /* If x is a NaN, return it. Otherwise, return y. */
71 bnez a7, .Ladd_return_nan
77 .Ladd_return_nan_or_inf:
79 bnez a7, .Ladd_return_nan
83 movi a6, 0x400000 /* make it a quiet NaN */
88 /* Operand signs differ. Do a subtraction. */
95 .type __addsf3, @function
100 /* Check if the two operands have the same sign. */
102 bltz a7, .Ladd_opposite_signs
105 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
106 ball a2, a6, .Ladd_xnan_or_inf
107 ball a3, a6, .Ladd_ynan_or_inf
109 /* Compare the exponents. The smaller operand will be shifted
110 right by the exponent difference and added to the larger
114 bltu a7, a8, .Ladd_shiftx
117 /* Check if the smaller (or equal) exponent is zero. */
118 bnone a3, a6, .Ladd_yexpzero
120 /* Replace y sign/exponent with 0x008. */
126 /* Compute the exponent difference. */
129 /* Exponent difference > 32 -- just return the bigger value. */
132 /* Shift y right by the exponent difference. Any bits that are
133 shifted out of y are saved in a9 for rounding the result. */
139 /* Do the addition. */
142 /* Check if the add overflowed into the exponent. */
144 beq a10, a7, .Ladd_round
149 /* y is a subnormal value. Replace its sign/exponent with zero,
150 i.e., no implicit "1.0", and increment the apparent exponent
151 because subnormals behave as if they had the minimum (nonzero)
152 exponent. Test for the case when both exponents are zero. */
155 bnone a2, a6, .Ladd_bothexpzero
160 /* Both exponents are zero. Handle this as a special case. There
161 is no need to shift or round, and the normal code for handling
162 a carry into the exponent field will not work because it
163 assumes there is an implicit "1.0" that needs to be added. */
168 /* Same as "yexpzero" except skip handling the case when both
169 exponents are zero. */
176 /* Same thing as the "shifty" code, but with x and y swapped. Also,
177 because the exponent difference is always nonzero in this version,
178 the shift sequence can use SLL and skip loading a constant zero. */
179 bnone a2, a6, .Ladd_xexpzero
187 bgeui a10, 32, .Ladd_returny
195 /* Check if the add overflowed into the exponent. */
197 bne a10, a8, .Ladd_carry
200 /* Round up if the leftover fraction is >= 1/2. */
204 /* Check if the leftover fraction is exactly 1/2. */
206 beqz a9, .Ladd_exactlyhalf
214 /* The addition has overflowed into the exponent field, so the
215 value needs to be renormalized. The mantissa of the result
216 can be recovered by subtracting the original exponent and
217 adding 0x800000 (which is the explicit "1.0" for the
218 mantissa of the non-shifted operand -- the "1.0" for the
219 shifted operand was already added). The mantissa can then
220 be shifted right by one bit. The explicit "1.0" of the
221 shifted mantissa then needs to be replaced by the exponent,
222 incremented by one to account for the normalizing shift.
223 It is faster to combine these operations: do the shift first
224 and combine the additions and subtractions. If x is the
225 original exponent, the result is:
226 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
228 shifted mantissa + ((x + 1) << 22)
229 Note that the exponent is incremented here by leaving the
230 explicit "1.0" of the mantissa in the exponent field. */
232 /* Shift x right by one bit. Save the lsb. */
236 /* See explanation above. The original exponent is in a8. */
241 /* Return an Infinity if the exponent overflowed. */
242 ball a2, a6, .Ladd_infinity
244 /* Same thing as the "round" code except the msb of the leftover
245 fraction is bit 0 of a10, with the rest of the fraction in a9. */
248 beqz a9, .Ladd_exactlyhalf
252 /* Clear the mantissa. */
256 /* The sign bit may have been lost in a carry-out. Put it back. */
262 /* Round down to the nearest even value. */
271 /* Handle NaNs and Infinities. (This code is placed before the
272 start of the function just to keep it in range of the limited
273 branch displacements.) */
276 /* If y is neither Infinity nor NaN, return x. */
277 bnall a3, a6, .Lsub_return_nan_or_inf
278 /* Both x and y are either NaN or Inf, so the result is NaN. */
281 movi a4, 0x400000 /* make it a quiet NaN */
286 /* Negate y and return it. */
290 .Lsub_return_nan_or_inf:
292 bnez a7, .Lsub_return_nan
295 .Lsub_opposite_signs:
296 /* Operand signs differ. Do an addition. */
303 .type __subsf3, @function
308 /* Check if the two operands have the same sign. */
310 bltz a7, .Lsub_opposite_signs
313 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
314 ball a2, a6, .Lsub_xnan_or_inf
315 ball a3, a6, .Lsub_ynan_or_inf
317 /* Compare the operands. In contrast to addition, the entire
318 value matters here. */
321 bltu a2, a3, .Lsub_xsmaller
324 /* Check if the smaller (or equal) exponent is zero. */
325 bnone a3, a6, .Lsub_yexpzero
327 /* Replace y sign/exponent with 0x008. */
333 /* Compute the exponent difference. */
336 /* Exponent difference > 32 -- just return the bigger value. */
339 /* Shift y right by the exponent difference. Any bits that are
340 shifted out of y are saved in a9 for rounding the result. */
348 /* Subtract the leftover bits in a9 from zero and propagate any
354 /* Check if the subtract underflowed into the exponent. */
356 beq a10, a7, .Lsub_round
360 /* Return zero if the inputs are equal. (For the non-subnormal
361 case, subtracting the "1.0" will cause a borrow from the exponent
362 and this case can be detected when handling the borrow.) */
363 beq a2, a3, .Lsub_return_zero
365 /* y is a subnormal value. Replace its sign/exponent with zero,
366 i.e., no implicit "1.0". Unless x is also a subnormal, increment
367 y's apparent exponent because subnormals behave as if they had
368 the minimum (nonzero) exponent. */
371 bnone a2, a6, .Lsub_yexpdiff
376 /* Negate and return y. */
382 /* Same thing as the "ysmaller" code, but with x and y swapped and
384 bnone a2, a6, .Lsub_xexpzero
392 bgeui a10, 32, .Lsub_returny
409 /* Check if the subtract underflowed into the exponent. */
411 bne a10, a8, .Lsub_borrow
414 /* Round up if the leftover fraction is >= 1/2. */
418 /* Check if the leftover fraction is exactly 1/2. */
420 beqz a9, .Lsub_exactlyhalf
424 /* Same as "yexpzero". */
425 beq a2, a3, .Lsub_return_zero
428 bnone a3, a6, .Lsub_xexpdiff
437 /* The subtraction has underflowed into the exponent field, so the
438 value needs to be renormalized. Shift the mantissa left as
439 needed to remove any leading zeros and adjust the exponent
440 accordingly. If the exponent is not large enough to remove
441 all the leading zeros, the result will be a subnormal value. */
445 do_nsau a6, a8, a7, a11
447 bge a6, a10, .Lsub_subnormal
450 .Lsub_normalize_shift:
451 /* Shift the mantissa (a8/a9) left by a6. */
456 /* Combine the shifted mantissa with the sign and exponent,
457 decrementing the exponent by a6. (The exponent has already
458 been decremented by one due to the borrow from the subtraction,
459 but adding the mantissa will increment the exponent by one.) */
467 /* Round down to the nearest even value. */
473 /* If there was a borrow from the exponent, and the mantissa and
474 guard digits are all zero, then the inputs were equal and the
475 result should be zero. */
476 beqz a9, .Lsub_return_zero
478 /* Only the guard digit is nonzero. Shift by min(24, a10). */
482 j .Lsub_normalize_shift
485 /* The exponent is too small to shift away all the leading zeros.
486 Set a6 to the current exponent (which has already been
487 decremented by the borrow) so that the exponent of the result
488 will be zero. Do not add 1 to a6 in this case, because: (1)
489 adding the mantissa will not increment the exponent, so there is
490 no need to subtract anything extra from the exponent to
491 compensate, and (2) the effective exponent of a subnormal is 1
492 not 0 so the shift amount must be 1 smaller than normal. */
494 j .Lsub_normalize_shift
496 #endif /* L_addsubsf3 */
501 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
502 #define XCHAL_NO_MUL 1
508 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
509 (This code is placed before the start of the function just to
510 keep it in range of the limited branch displacements.) */
513 /* Clear the sign bit of x. */
517 /* If x is zero, return zero. */
518 beqz a2, .Lmul_return_zero
520 /* Normalize x. Adjust the exponent in a8. */
521 do_nsau a10, a2, a11, a12
530 /* Clear the sign bit of y. */
534 /* If y is zero, return zero. */
535 beqz a3, .Lmul_return_zero
537 /* Normalize y. Adjust the exponent in a9. */
538 do_nsau a10, a3, a11, a12
547 /* Return zero with the appropriate sign bit. */
553 /* If y is zero, return NaN. */
555 beqz a8, .Lmul_return_nan
556 /* If y is NaN, return y. */
557 bnall a3, a6, .Lmul_returnx
559 beqz a8, .Lmul_returnx
566 bnez a8, .Lmul_return_nan
567 /* Set the sign bit and return. */
575 /* If x is zero, return NaN. */
577 bnez a8, .Lmul_returny
581 movi a4, 0x400000 /* make it a quiet NaN */
587 .type __mulsf3, @function
589 #if __XTENSA_CALL0_ABI__
597 /* This is not really a leaf function; allocate enough stack space
598 to allow CALL12s to a helper function. */
605 /* Get the sign of the result. */
608 /* Check for NaN and infinity. */
609 ball a2, a6, .Lmul_xnan_or_inf
610 ball a3, a6, .Lmul_ynan_or_inf
612 /* Extract the exponents. */
616 beqz a8, .Lmul_xexpzero
618 beqz a9, .Lmul_yexpzero
621 /* Add the exponents. */
624 /* Replace sign/exponent fields with explicit "1.0". */
631 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
633 #if XCHAL_HAVE_MUL32_HIGH
640 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
641 products. These partial products are:
650 If using the Mul16 or Mul32 multiplier options, these input
651 chunks must be stored in separate registers. For Mac16, the
652 UMUL.AA.* opcodes can specify that the inputs come from either
653 half of the registers, so there is no need to shift them out
654 ahead of time. If there is no multiply hardware, the 16-bit
655 chunks can be extracted when setting up the arguments to the
656 separate multiply function. */
658 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
659 /* Calling a separate multiply function will clobber a0 and requires
660 use of a8 as a temporary, so save those values now. (The function
661 uses a custom ABI so nothing else needs to be saved.) */
666 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
671 /* Get the high halves of the inputs into registers. */
678 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
679 /* Clear the high halves of the inputs. This does not matter
680 for MUL16 because the high bits are ignored. */
684 #endif /* MUL16 || MUL32 */
689 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
690 mul16u dst, xreg ## xhalf, yreg ## yhalf
692 #elif XCHAL_HAVE_MUL32
694 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
695 mull dst, xreg ## xhalf, yreg ## yhalf
697 #elif XCHAL_HAVE_MAC16
699 /* The preprocessor insists on inserting a space when concatenating after
700 a period in the definition of do_mul below. These macros are a workaround
701 using underscores instead of periods when doing the concatenation. */
702 #define umul_aa_ll umul.aa.ll
703 #define umul_aa_lh umul.aa.lh
704 #define umul_aa_hl umul.aa.hl
705 #define umul_aa_hh umul.aa.hh
707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
711 #else /* no multiply hardware */
713 #define set_arg_l(dst, src) \
714 extui dst, src, 0, 16
715 #define set_arg_h(dst, src) \
718 #if __XTENSA_CALL0_ABI__
719 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
720 set_arg_ ## xhalf (a13, xreg); \
721 set_arg_ ## yhalf (a14, yreg); \
722 call0 .Lmul_mulsi3; \
725 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
726 set_arg_ ## xhalf (a14, xreg); \
727 set_arg_ ## yhalf (a15, yreg); \
728 call12 .Lmul_mulsi3; \
730 #endif /* __XTENSA_CALL0_ABI__ */
732 #endif /* no multiply hardware */
734 /* Add pp1 and pp2 into a6 with carry-out in a9. */
735 do_mul(a6, a2, l, a3, h) /* pp 1 */
736 do_mul(a11, a2, h, a3, l) /* pp 2 */
742 /* Shift the high half of a9/a6 into position in a9. Note that
743 this value can be safely incremented without any carry-outs. */
747 /* Compute the low word into a6. */
748 do_mul(a11, a2, l, a3, l) /* pp 0 */
754 /* Compute the high word into a2. */
755 do_mul(a2, a2, h, a3, h) /* pp 3 */
758 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
759 /* Restore values saved on the stack during the multiplication. */
763 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
765 /* Shift left by 9 bits, unless there was a carry-out from the
766 multiply, in which case, shift by 8 bits and increment the
777 /* Subtract the extra bias from the exponent sum (plus one to account
778 for the explicit "1.0" of the mantissa that will be added to the
779 exponent in the final result). */
783 /* Check for over/underflow. The value in a8 is one less than the
784 final exponent, so values in the range 0..fd are OK here. */
786 bgeu a8, a4, .Lmul_overflow
790 bgez a6, .Lmul_rounded
793 beqz a6, .Lmul_exactlyhalf
796 /* Add the exponent to the mantissa. */
801 /* Add the sign bit. */
807 #if __XTENSA_CALL0_ABI__
817 /* Round down to the nearest even value. */
823 bltz a8, .Lmul_underflow
824 /* Return +/- Infinity. */
830 /* Create a subnormal value, where the exponent field contains zero,
831 but the effective exponent is 1. The value of a8 is one less than
832 the actual exponent, so just negate it to get the shift amount. */
836 bgeui a8, 32, .Lmul_flush_to_zero
838 /* Shift a2 right. Any bits that are shifted out of a2 are saved
839 in a6 (combined with the shifted-out bits currently in a6) for
840 rounding the result. */
844 /* Set the exponent to zero. */
847 /* Pack any nonzero bits shifted out into a6. */
854 /* Return zero with the appropriate sign bit. */
861 /* For Xtensa processors with no multiply hardware, this simplified
862 version of _mulsi3 is used for multiplying 16-bit chunks of
863 the floating-point mantissas. When using CALL0, this function
864 uses a custom ABI: the inputs are passed in a13 and a14, the
865 result is returned in a12, and a8 and a15 are clobbered. */
869 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
871 1: add \tmp1, \src2, \dst
872 extui \tmp2, \src1, 0, 1
873 movnez \dst, \tmp1, \tmp2
875 do_addx2 \tmp1, \src2, \dst, \tmp1
876 extui \tmp2, \src1, 1, 1
877 movnez \dst, \tmp1, \tmp2
879 do_addx4 \tmp1, \src2, \dst, \tmp1
880 extui \tmp2, \src1, 2, 1
881 movnez \dst, \tmp1, \tmp2
883 do_addx8 \tmp1, \src2, \dst, \tmp1
884 extui \tmp2, \src1, 3, 1
885 movnez \dst, \tmp1, \tmp2
891 #if __XTENSA_CALL0_ABI__
892 mul_mulsi3_body a12, a13, a14, a15, a8
894 /* The result will be written into a2, so save that argument in a4. */
896 mul_mulsi3_body a2, a4, a3, a5, a6
899 #endif /* XCHAL_NO_MUL */
900 #endif /* L_mulsf3 */
906 #if XCHAL_HAVE_FP_DIV
910 .type __divsf3, @function
914 wfr f1, a2 /* dividend */
915 wfr f2, a3 /* divisor */
952 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
953 (This code is placed before the start of the function just to
954 keep it in range of the limited branch displacements.) */
957 /* Clear the sign bit of y. */
961 /* Check for division by zero. */
964 /* Normalize y. Adjust the exponent in a9. */
965 do_nsau a10, a3, a4, a5
974 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
981 movi a4, 0x400000 /* make it a quiet NaN */
986 /* Clear the sign bit of x. */
990 /* If x is zero, return zero. */
991 beqz a2, .Ldiv_return_zero
993 /* Normalize x. Adjust the exponent in a8. */
994 do_nsau a10, a2, a4, a5
1003 /* Return zero with the appropriate sign bit. */
1009 /* Set the sign bit of the result. */
1013 /* If y is NaN or Inf, return NaN. */
1014 ball a3, a6, .Ldiv_return_nan
1016 bnez a7, .Ldiv_return_nan
1020 /* If y is Infinity, return zero. */
1022 beqz a8, .Ldiv_return_zero
1023 /* y is NaN; return it. */
1027 movi a4, 0x400000 /* make it a quiet NaN */
1033 .type __divsf3, @function
1038 /* Get the sign of the result. */
1041 /* Check for NaN and infinity. */
1042 ball a2, a6, .Ldiv_xnan_or_inf
1043 ball a3, a6, .Ldiv_ynan_or_inf
1045 /* Extract the exponents. */
1049 beqz a9, .Ldiv_yexpzero
1051 beqz a8, .Ldiv_xexpzero
1054 /* Subtract the exponents. */
1057 /* Replace sign/exponent fields with explicit "1.0". */
1064 /* The first digit of the mantissa division must be a one.
1065 Shift x (and adjust the exponent) as needed to make this true. */
1070 /* Do the first subtraction and shift. */
1074 /* Put the quotient into a10. */
1077 /* Divide one bit at a time for 23 bits. */
1079 #if XCHAL_HAVE_LOOPS
1080 loop a9, .Ldiv_loopend
1083 /* Shift the quotient << 1. */
1086 /* Is this digit a 0 or 1? */
1089 /* Output a 1 and subtract. */
1093 /* Shift the dividend << 1. */
1096 #if !XCHAL_HAVE_LOOPS
1102 /* Add the exponent bias (less one to account for the explicit "1.0"
1103 of the mantissa that will be added to the exponent in the final
1107 /* Check for over/underflow. The value in a8 is one less than the
1108 final exponent, so values in the range 0..fd are OK here. */
1110 bgeu a8, a4, .Ldiv_overflow
1113 /* Round. The remainder (<< 1) is in a2. */
1114 bltu a2, a3, .Ldiv_rounded
1116 beq a2, a3, .Ldiv_exactlyhalf
1119 /* Add the exponent to the mantissa. */
1124 /* Add the sign bit. */
1131 bltz a8, .Ldiv_underflow
1132 /* Return +/- Infinity. */
1133 addi a8, a4, 1 /* 0xff */
1138 /* Remainder is exactly half the divisor. Round even. */
1144 /* Create a subnormal value, where the exponent field contains zero,
1145 but the effective exponent is 1. The value of a8 is one less than
1146 the actual exponent, so just negate it to get the shift amount. */
1149 bgeui a8, 32, .Ldiv_flush_to_zero
1151 /* Shift a10 right. Any bits that are shifted out of a10 are
1152 saved in a6 for rounding the result. */
1156 /* Set the exponent to zero. */
1159 /* Pack any nonzero remainder (in a2) into a6. */
1164 /* Round a10 based on the bits shifted out into a6. */
1165 1: bgez a6, .Ldiv_rounded
1168 bnez a6, .Ldiv_rounded
1173 .Ldiv_flush_to_zero:
1174 /* Return zero with the appropriate sign bit. */
1179 #endif /* XCHAL_HAVE_FP_DIV */
1181 #endif /* L_divsf3 */
1185 /* Equal and Not Equal */
1190 .set __nesf2, __eqsf2
1191 .type __eqsf2, @function
1196 /* The values are equal but NaN != NaN. Check the exponent. */
1208 /* Check if the mantissas are nonzero. */
1212 /* Check if x and y are zero with different signs. */
1216 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1217 or x when exponent(x) = 0x7f8 and x == y. */
1228 .type __gtsf2, @function
1233 1: bnall a3, a6, .Lle_cmp
1235 /* Check if y is a NaN. */
1241 /* Check if x is a NaN. */
1248 /* Less Than or Equal */
1252 .type __lesf2, @function
1257 1: bnall a3, a6, .Lle_cmp
1259 /* Check if y is a NaN. */
1265 /* Check if x is a NaN. */
1272 /* Check if x and y have different signs. */
1274 bltz a7, .Lle_diff_signs
1276 /* Check if x is negative. */
1279 /* Check if x <= y. */
1285 /* Check if y <= x. */
1293 /* Check if both x and y are zero. */
1302 /* Greater Than or Equal */
1306 .type __gesf2, @function
1311 1: bnall a3, a6, .Llt_cmp
1313 /* Check if y is a NaN. */
1319 /* Check if x is a NaN. */
1330 .type __ltsf2, @function
1335 1: bnall a3, a6, .Llt_cmp
1337 /* Check if y is a NaN. */
1343 /* Check if x is a NaN. */
1350 /* Check if x and y have different signs. */
1352 bltz a7, .Llt_diff_signs
1354 /* Check if x is negative. */
1357 /* Check if x < y. */
1363 /* Check if y < x. */
1371 /* Check if both x and y are nonzero. */
1384 .type __unordsf2, @function
1403 #endif /* L_cmpsf2 */
1409 .type __fixsfsi, @function
1413 /* Check for NaN and Infinity. */
1415 ball a2, a6, .Lfixsfsi_nan_or_inf
1417 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1420 bgei a4, 32, .Lfixsfsi_maxint
1421 blti a4, 1, .Lfixsfsi_zero
1423 /* Add explicit "1.0" and shift << 8. */
1427 /* Shift back to the right, based on the exponent. */
1428 ssl a4 /* shift by 32 - a4 */
1431 /* Negate the result if sign != 0. */
1436 .Lfixsfsi_nan_or_inf:
1437 /* Handle Infinity and NaN. */
1439 beqz a4, .Lfixsfsi_maxint
1441 /* Translate NaN to +maxint. */
1445 slli a4, a6, 8 /* 0x80000000 */
1446 addi a5, a4, -1 /* 0x7fffffff */
1455 #endif /* L_fixsfsi */
1461 .type __fixsfdi, @function
1465 /* Check for NaN and Infinity. */
1467 ball a2, a6, .Lfixsfdi_nan_or_inf
1469 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1472 bgei a4, 64, .Lfixsfdi_maxint
1473 blti a4, 1, .Lfixsfdi_zero
1475 /* Add explicit "1.0" and shift << 8. */
1479 /* Shift back to the right, based on the exponent. */
1480 ssl a4 /* shift by 64 - a4 */
1481 bgei a4, 32, .Lfixsfdi_smallshift
1486 /* Negate the result if sign != 0. */
1494 .Lfixsfdi_smallshift:
1500 .Lfixsfdi_nan_or_inf:
1501 /* Handle Infinity and NaN. */
1503 beqz a4, .Lfixsfdi_maxint
1505 /* Translate NaN to +maxint. */
1509 slli a7, a6, 8 /* 0x80000000 */
1515 1: addi xh, a7, -1 /* 0x7fffffff */
1524 #endif /* L_fixsfdi */
1529 .global __fixunssfsi
1530 .type __fixunssfsi, @function
1534 /* Check for NaN and Infinity. */
1536 ball a2, a6, .Lfixunssfsi_nan_or_inf
1538 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1541 bgei a4, 32, .Lfixunssfsi_maxint
1542 bltz a4, .Lfixunssfsi_zero
1544 /* Add explicit "1.0" and shift << 8. */
1548 /* Shift back to the right, based on the exponent. */
1550 beqi a4, 32, .Lfixunssfsi_bigexp
1551 ssl a4 /* shift by 32 - a4 */
1554 /* Negate the result if sign != 0. */
1559 .Lfixunssfsi_nan_or_inf:
1560 /* Handle Infinity and NaN. */
1562 beqz a4, .Lfixunssfsi_maxint
1564 /* Translate NaN to 0xffffffff. */
1568 .Lfixunssfsi_maxint:
1569 slli a4, a6, 8 /* 0x80000000 */
1570 movi a5, -1 /* 0xffffffff */
1579 .Lfixunssfsi_bigexp:
1580 /* Handle unsigned maximum exponent case. */
1582 mov a2, a5 /* no shift needed */
1585 /* Return 0x80000000 if negative. */
1589 #endif /* L_fixunssfsi */
1594 .global __fixunssfdi
1595 .type __fixunssfdi, @function
1599 /* Check for NaN and Infinity. */
1601 ball a2, a6, .Lfixunssfdi_nan_or_inf
1603 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1606 bgei a4, 64, .Lfixunssfdi_maxint
1607 bltz a4, .Lfixunssfdi_zero
1609 /* Add explicit "1.0" and shift << 8. */
1613 /* Shift back to the right, based on the exponent. */
1615 beqi a4, 64, .Lfixunssfdi_bigexp
1616 ssl a4 /* shift by 64 - a4 */
1617 bgei a4, 32, .Lfixunssfdi_smallshift
1621 .Lfixunssfdi_shifted:
1622 /* Negate the result if sign != 0. */
1630 .Lfixunssfdi_smallshift:
1634 j .Lfixunssfdi_shifted
1636 .Lfixunssfdi_nan_or_inf:
1637 /* Handle Infinity and NaN. */
1639 beqz a4, .Lfixunssfdi_maxint
1641 /* Translate NaN to 0xffffffff.... */
1646 .Lfixunssfdi_maxint:
1648 2: slli xh, a6, 8 /* 0x80000000 */
1657 .Lfixunssfdi_bigexp:
1658 /* Handle unsigned maximum exponent case. */
1661 leaf_return /* no shift needed */
1663 #endif /* L_fixunssfdi */
1668 .global __floatunsisf
1669 .type __floatunsisf, @function
1672 beqz a2, .Lfloatsisf_return
1674 /* Set the sign to zero and jump to the floatsisf code. */
1676 j .Lfloatsisf_normalize
1680 .type __floatsisf, @function
1684 /* Check for zero. */
1685 beqz a2, .Lfloatsisf_return
1687 /* Save the sign. */
1690 /* Get the absolute value. */
1698 .Lfloatsisf_normalize:
1699 /* Normalize with the first 1 bit in the msb. */
1700 do_nsau a4, a2, a5, a6
1704 /* Shift the mantissa into position, with rounding bits in a6. */
1706 slli a6, a5, (32 - 8)
1708 /* Set the exponent. */
1709 movi a5, 0x9d /* 0x7e + 31 */
1718 /* Round up if the leftover fraction is >= 1/2. */
1719 bgez a6, .Lfloatsisf_return
1720 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1722 /* Check if the leftover fraction is exactly 1/2. */
1724 beqz a6, .Lfloatsisf_exactlyhalf
1729 .Lfloatsisf_exactlyhalf:
1730 /* Round down to the nearest even value. */
1735 #endif /* L_floatsisf */
1740 .global __floatundisf
1741 .type __floatundisf, @function
1745 /* Check for zero. */
1749 /* Set the sign to zero and jump to the floatdisf code. */
1751 j .Lfloatdisf_normalize
1755 .type __floatdisf, @function
1759 /* Check for zero. */
1763 /* Save the sign. */
1766 /* Get the absolute value. */
1767 bgez xh, .Lfloatdisf_normalize
1770 beqz xl, .Lfloatdisf_normalize
1773 .Lfloatdisf_normalize:
1774 /* Normalize with the first 1 bit in the msb of xh. */
1775 beqz xh, .Lfloatdisf_bigshift
1776 do_nsau a4, xh, a5, a6
1781 .Lfloatdisf_shifted:
1782 /* Shift the mantissa into position, with rounding bits in a6. */
1791 /* Set the exponent. */
1792 movi a5, 0xbd /* 0x7e + 63 */
1801 /* Round up if the leftover fraction is >= 1/2. */
1803 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1805 /* Check if the leftover fraction is exactly 1/2. */
1807 beqz a6, .Lfloatdisf_exactlyhalf
1810 .Lfloatdisf_bigshift:
1811 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1812 do_nsau a4, xl, a5, a6
1817 j .Lfloatdisf_shifted
1819 .Lfloatdisf_exactlyhalf:
1820 /* Round down to the nearest even value. */
1825 #endif /* L_floatdisf */
1827 #if XCHAL_HAVE_FP_SQRT
1832 .global __ieee754_sqrtf
1833 .type __ieee754_sqrtf, @function
1873 #endif /* L_sqrtf */
1874 #endif /* XCHAL_HAVE_FP_SQRT */
1876 #if XCHAL_HAVE_FP_RECIP
1882 .type __recipsf2, @function
1900 #endif /* L_recipsf2 */
1901 #endif /* XCHAL_HAVE_FP_RECIP */
1903 #if XCHAL_HAVE_FP_RSQRT
1905 /* Reciprocal square root */
1909 .type __rsqrtsf2, @function
1932 #endif /* L_rsqrtsf2 */
1933 #endif /* XCHAL_HAVE_FP_RSQRT */