1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
47 .type __negsf2, @function
61 /* Handle NaNs and Infinities. (This code is placed before the
62 start of the function just to keep it in range of the limited
63 branch displacements.) */
66 /* If y is neither Infinity nor NaN, return x. */
68 /* If x is a NaN, return it. Otherwise, return y. */
70 beqz a7, .Ladd_ynan_or_inf
79 /* Operand signs differ. Do a subtraction. */
86 .type __addsf3, @function
91 /* Check if the two operands have the same sign. */
93 bltz a7, .Ladd_opposite_signs
96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
97 ball a2, a6, .Ladd_xnan_or_inf
98 ball a3, a6, .Ladd_ynan_or_inf
100 /* Compare the exponents. The smaller operand will be shifted
101 right by the exponent difference and added to the larger
105 bltu a7, a8, .Ladd_shiftx
108 /* Check if the smaller (or equal) exponent is zero. */
109 bnone a3, a6, .Ladd_yexpzero
111 /* Replace y sign/exponent with 0x008. */
117 /* Compute the exponent difference. */
120 /* Exponent difference > 32 -- just return the bigger value. */
123 /* Shift y right by the exponent difference. Any bits that are
124 shifted out of y are saved in a9 for rounding the result. */
130 /* Do the addition. */
133 /* Check if the add overflowed into the exponent. */
135 beq a10, a7, .Ladd_round
140 /* y is a subnormal value. Replace its sign/exponent with zero,
141 i.e., no implicit "1.0", and increment the apparent exponent
142 because subnormals behave as if they had the minimum (nonzero)
143 exponent. Test for the case when both exponents are zero. */
146 bnone a2, a6, .Ladd_bothexpzero
151 /* Both exponents are zero. Handle this as a special case. There
152 is no need to shift or round, and the normal code for handling
153 a carry into the exponent field will not work because it
154 assumes there is an implicit "1.0" that needs to be added. */
159 /* Same as "yexpzero" except skip handling the case when both
160 exponents are zero. */
167 /* Same thing as the "shifty" code, but with x and y swapped. Also,
168 because the exponent difference is always nonzero in this version,
169 the shift sequence can use SLL and skip loading a constant zero. */
170 bnone a2, a6, .Ladd_xexpzero
178 bgeui a10, 32, .Ladd_returny
186 /* Check if the add overflowed into the exponent. */
188 bne a10, a8, .Ladd_carry
191 /* Round up if the leftover fraction is >= 1/2. */
195 /* Check if the leftover fraction is exactly 1/2. */
197 beqz a9, .Ladd_exactlyhalf
205 /* The addition has overflowed into the exponent field, so the
206 value needs to be renormalized. The mantissa of the result
207 can be recovered by subtracting the original exponent and
208 adding 0x800000 (which is the explicit "1.0" for the
209 mantissa of the non-shifted operand -- the "1.0" for the
210 shifted operand was already added). The mantissa can then
211 be shifted right by one bit. The explicit "1.0" of the
212 shifted mantissa then needs to be replaced by the exponent,
213 incremented by one to account for the normalizing shift.
214 It is faster to combine these operations: do the shift first
215 and combine the additions and subtractions. If x is the
216 original exponent, the result is:
217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
219 shifted mantissa + ((x + 1) << 22)
220 Note that the exponent is incremented here by leaving the
221 explicit "1.0" of the mantissa in the exponent field. */
223 /* Shift x right by one bit. Save the lsb. */
227 /* See explanation above. The original exponent is in a8. */
232 /* Return an Infinity if the exponent overflowed. */
233 ball a2, a6, .Ladd_infinity
235 /* Same thing as the "round" code except the msb of the leftover
236 fraction is bit 0 of a10, with the rest of the fraction in a9. */
239 beqz a9, .Ladd_exactlyhalf
243 /* Clear the mantissa. */
247 /* The sign bit may have been lost in a carry-out. Put it back. */
253 /* Round down to the nearest even value. */
262 /* Handle NaNs and Infinities. (This code is placed before the
263 start of the function just to keep it in range of the limited
264 branch displacements.) */
267 /* If y is neither Infinity nor NaN, return x. */
269 /* Both x and y are either NaN or Inf, so the result is NaN. */
270 movi a4, 0x400000 /* make it a quiet NaN */
275 /* Negate y and return it. */
280 .Lsub_opposite_signs:
281 /* Operand signs differ. Do an addition. */
288 .type __subsf3, @function
293 /* Check if the two operands have the same sign. */
295 bltz a7, .Lsub_opposite_signs
298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
299 ball a2, a6, .Lsub_xnan_or_inf
300 ball a3, a6, .Lsub_ynan_or_inf
302 /* Compare the operands. In contrast to addition, the entire
303 value matters here. */
306 bltu a2, a3, .Lsub_xsmaller
309 /* Check if the smaller (or equal) exponent is zero. */
310 bnone a3, a6, .Lsub_yexpzero
312 /* Replace y sign/exponent with 0x008. */
318 /* Compute the exponent difference. */
321 /* Exponent difference > 32 -- just return the bigger value. */
324 /* Shift y right by the exponent difference. Any bits that are
325 shifted out of y are saved in a9 for rounding the result. */
333 /* Subtract the leftover bits in a9 from zero and propagate any
339 /* Check if the subtract underflowed into the exponent. */
341 beq a10, a7, .Lsub_round
345 /* Return zero if the inputs are equal. (For the non-subnormal
346 case, subtracting the "1.0" will cause a borrow from the exponent
347 and this case can be detected when handling the borrow.) */
348 beq a2, a3, .Lsub_return_zero
350 /* y is a subnormal value. Replace its sign/exponent with zero,
351 i.e., no implicit "1.0". Unless x is also a subnormal, increment
352 y's apparent exponent because subnormals behave as if they had
353 the minimum (nonzero) exponent. */
356 bnone a2, a6, .Lsub_yexpdiff
361 /* Negate and return y. */
367 /* Same thing as the "ysmaller" code, but with x and y swapped and
369 bnone a2, a6, .Lsub_xexpzero
377 bgeui a10, 32, .Lsub_returny
394 /* Check if the subtract underflowed into the exponent. */
396 bne a10, a8, .Lsub_borrow
399 /* Round up if the leftover fraction is >= 1/2. */
403 /* Check if the leftover fraction is exactly 1/2. */
405 beqz a9, .Lsub_exactlyhalf
409 /* Same as "yexpzero". */
410 beq a2, a3, .Lsub_return_zero
413 bnone a3, a6, .Lsub_xexpdiff
422 /* The subtraction has underflowed into the exponent field, so the
423 value needs to be renormalized. Shift the mantissa left as
424 needed to remove any leading zeros and adjust the exponent
425 accordingly. If the exponent is not large enough to remove
426 all the leading zeros, the result will be a subnormal value. */
430 do_nsau a6, a8, a7, a11
432 bge a6, a10, .Lsub_subnormal
435 .Lsub_normalize_shift:
436 /* Shift the mantissa (a8/a9) left by a6. */
441 /* Combine the shifted mantissa with the sign and exponent,
442 decrementing the exponent by a6. (The exponent has already
443 been decremented by one due to the borrow from the subtraction,
444 but adding the mantissa will increment the exponent by one.) */
452 /* Round down to the nearest even value. */
458 /* If there was a borrow from the exponent, and the mantissa and
459 guard digits are all zero, then the inputs were equal and the
460 result should be zero. */
461 beqz a9, .Lsub_return_zero
463 /* Only the guard digit is nonzero. Shift by min(24, a10). */
467 j .Lsub_normalize_shift
470 /* The exponent is too small to shift away all the leading zeros.
471 Set a6 to the current exponent (which has already been
472 decremented by the borrow) so that the exponent of the result
473 will be zero. Do not add 1 to a6 in this case, because: (1)
474 adding the mantissa will not increment the exponent, so there is
475 no need to subtract anything extra from the exponent to
476 compensate, and (2) the effective exponent of a subnormal is 1
477 not 0 so the shift amount must be 1 smaller than normal. */
479 j .Lsub_normalize_shift
481 #endif /* L_addsubsf3 */
486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487 #define XCHAL_NO_MUL 1
492 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
493 (This code is placed before the start of the function just to
494 keep it in range of the limited branch displacements.) */
497 /* Clear the sign bit of x. */
501 /* If x is zero, return zero. */
502 beqz a2, .Lmul_return_zero
504 /* Normalize x. Adjust the exponent in a8. */
505 do_nsau a10, a2, a11, a12
514 /* Clear the sign bit of y. */
518 /* If y is zero, return zero. */
519 beqz a3, .Lmul_return_zero
521 /* Normalize y. Adjust the exponent in a9. */
522 do_nsau a10, a3, a11, a12
531 /* Return zero with the appropriate sign bit. */
537 /* If y is zero, return NaN. */
540 movi a4, 0x400000 /* make it a quiet NaN */
544 /* If y is NaN, return y. */
545 bnall a3, a6, .Lmul_returnx
547 beqz a8, .Lmul_returnx
553 /* Set the sign bit and return. */
561 /* If x is zero, return NaN. */
563 bnez a8, .Lmul_returny
564 movi a7, 0x400000 /* make it a quiet NaN */
570 .type __mulsf3, @function
572 #if __XTENSA_CALL0_ABI__
580 /* This is not really a leaf function; allocate enough stack space
581 to allow CALL12s to a helper function. */
588 /* Get the sign of the result. */
591 /* Check for NaN and infinity. */
592 ball a2, a6, .Lmul_xnan_or_inf
593 ball a3, a6, .Lmul_ynan_or_inf
595 /* Extract the exponents. */
599 beqz a8, .Lmul_xexpzero
601 beqz a9, .Lmul_yexpzero
604 /* Add the exponents. */
607 /* Replace sign/exponent fields with explicit "1.0". */
614 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
616 #if XCHAL_HAVE_MUL32_HIGH
623 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
624 products. These partial products are:
633 If using the Mul16 or Mul32 multiplier options, these input
634 chunks must be stored in separate registers. For Mac16, the
635 UMUL.AA.* opcodes can specify that the inputs come from either
636 half of the registers, so there is no need to shift them out
637 ahead of time. If there is no multiply hardware, the 16-bit
638 chunks can be extracted when setting up the arguments to the
639 separate multiply function. */
641 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
642 /* Calling a separate multiply function will clobber a0 and requires
643 use of a8 as a temporary, so save those values now. (The function
644 uses a custom ABI so nothing else needs to be saved.) */
649 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
654 /* Get the high halves of the inputs into registers. */
661 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
662 /* Clear the high halves of the inputs. This does not matter
663 for MUL16 because the high bits are ignored. */
667 #endif /* MUL16 || MUL32 */
672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673 mul16u dst, xreg ## xhalf, yreg ## yhalf
675 #elif XCHAL_HAVE_MUL32
677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678 mull dst, xreg ## xhalf, yreg ## yhalf
680 #elif XCHAL_HAVE_MAC16
682 /* The preprocessor insists on inserting a space when concatenating after
683 a period in the definition of do_mul below. These macros are a workaround
684 using underscores instead of periods when doing the concatenation. */
685 #define umul_aa_ll umul.aa.ll
686 #define umul_aa_lh umul.aa.lh
687 #define umul_aa_hl umul.aa.hl
688 #define umul_aa_hh umul.aa.hh
690 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
691 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
694 #else /* no multiply hardware */
696 #define set_arg_l(dst, src) \
697 extui dst, src, 0, 16
698 #define set_arg_h(dst, src) \
701 #if __XTENSA_CALL0_ABI__
702 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
703 set_arg_ ## xhalf (a13, xreg); \
704 set_arg_ ## yhalf (a14, yreg); \
705 call0 .Lmul_mulsi3; \
708 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
709 set_arg_ ## xhalf (a14, xreg); \
710 set_arg_ ## yhalf (a15, yreg); \
711 call12 .Lmul_mulsi3; \
713 #endif /* __XTENSA_CALL0_ABI__ */
715 #endif /* no multiply hardware */
717 /* Add pp1 and pp2 into a6 with carry-out in a9. */
718 do_mul(a6, a2, l, a3, h) /* pp 1 */
719 do_mul(a11, a2, h, a3, l) /* pp 2 */
725 /* Shift the high half of a9/a6 into position in a9. Note that
726 this value can be safely incremented without any carry-outs. */
730 /* Compute the low word into a6. */
731 do_mul(a11, a2, l, a3, l) /* pp 0 */
737 /* Compute the high word into a2. */
738 do_mul(a2, a2, h, a3, h) /* pp 3 */
741 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
742 /* Restore values saved on the stack during the multiplication. */
746 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
748 /* Shift left by 9 bits, unless there was a carry-out from the
749 multiply, in which case, shift by 8 bits and increment the
760 /* Subtract the extra bias from the exponent sum (plus one to account
761 for the explicit "1.0" of the mantissa that will be added to the
762 exponent in the final result). */
766 /* Check for over/underflow. The value in a8 is one less than the
767 final exponent, so values in the range 0..fd are OK here. */
769 bgeu a8, a4, .Lmul_overflow
773 bgez a6, .Lmul_rounded
776 beqz a6, .Lmul_exactlyhalf
779 /* Add the exponent to the mantissa. */
784 /* Add the sign bit. */
790 #if __XTENSA_CALL0_ABI__
800 /* Round down to the nearest even value. */
806 bltz a8, .Lmul_underflow
807 /* Return +/- Infinity. */
813 /* Create a subnormal value, where the exponent field contains zero,
814 but the effective exponent is 1. The value of a8 is one less than
815 the actual exponent, so just negate it to get the shift amount. */
819 bgeui a8, 32, .Lmul_flush_to_zero
821 /* Shift a2 right. Any bits that are shifted out of a2 are saved
822 in a6 (combined with the shifted-out bits currently in a6) for
823 rounding the result. */
827 /* Set the exponent to zero. */
830 /* Pack any nonzero bits shifted out into a6. */
837 /* Return zero with the appropriate sign bit. */
844 /* For Xtensa processors with no multiply hardware, this simplified
845 version of _mulsi3 is used for multiplying 16-bit chunks of
846 the floating-point mantissas. When using CALL0, this function
847 uses a custom ABI: the inputs are passed in a13 and a14, the
848 result is returned in a12, and a8 and a15 are clobbered. */
852 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
854 1: add \tmp1, \src2, \dst
855 extui \tmp2, \src1, 0, 1
856 movnez \dst, \tmp1, \tmp2
858 do_addx2 \tmp1, \src2, \dst, \tmp1
859 extui \tmp2, \src1, 1, 1
860 movnez \dst, \tmp1, \tmp2
862 do_addx4 \tmp1, \src2, \dst, \tmp1
863 extui \tmp2, \src1, 2, 1
864 movnez \dst, \tmp1, \tmp2
866 do_addx8 \tmp1, \src2, \dst, \tmp1
867 extui \tmp2, \src1, 3, 1
868 movnez \dst, \tmp1, \tmp2
874 #if __XTENSA_CALL0_ABI__
875 mul_mulsi3_body a12, a13, a14, a15, a8
877 /* The result will be written into a2, so save that argument in a4. */
879 mul_mulsi3_body a2, a4, a3, a5, a6
882 #endif /* XCHAL_NO_MUL */
883 #endif /* L_mulsf3 */
890 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
891 (This code is placed before the start of the function just to
892 keep it in range of the limited branch displacements.) */
895 /* Clear the sign bit of y. */
899 /* Check for division by zero. */
902 /* Normalize y. Adjust the exponent in a9. */
903 do_nsau a10, a3, a4, a5
912 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
919 movi a4, 0x400000 /* make it a quiet NaN */
924 /* Clear the sign bit of x. */
928 /* If x is zero, return zero. */
929 beqz a2, .Ldiv_return_zero
931 /* Normalize x. Adjust the exponent in a8. */
932 do_nsau a10, a2, a4, a5
941 /* Return zero with the appropriate sign bit. */
947 /* Set the sign bit of the result. */
951 /* If y is NaN or Inf, return NaN. */
953 movi a4, 0x400000 /* make it a quiet NaN */
958 /* If y is Infinity, return zero. */
960 beqz a8, .Ldiv_return_zero
961 /* y is NaN; return it. */
967 .type __divsf3, @function
972 /* Get the sign of the result. */
975 /* Check for NaN and infinity. */
976 ball a2, a6, .Ldiv_xnan_or_inf
977 ball a3, a6, .Ldiv_ynan_or_inf
979 /* Extract the exponents. */
983 beqz a9, .Ldiv_yexpzero
985 beqz a8, .Ldiv_xexpzero
988 /* Subtract the exponents. */
991 /* Replace sign/exponent fields with explicit "1.0". */
998 /* The first digit of the mantissa division must be a one.
999 Shift x (and adjust the exponent) as needed to make this true. */
1004 /* Do the first subtraction and shift. */
1008 /* Put the quotient into a10. */
1011 /* Divide one bit at a time for 23 bits. */
1013 #if XCHAL_HAVE_LOOPS
1014 loop a9, .Ldiv_loopend
1017 /* Shift the quotient << 1. */
1020 /* Is this digit a 0 or 1? */
1023 /* Output a 1 and subtract. */
1027 /* Shift the dividend << 1. */
1030 #if !XCHAL_HAVE_LOOPS
1036 /* Add the exponent bias (less one to account for the explicit "1.0"
1037 of the mantissa that will be added to the exponent in the final
1041 /* Check for over/underflow. The value in a8 is one less than the
1042 final exponent, so values in the range 0..fd are OK here. */
1044 bgeu a8, a4, .Ldiv_overflow
1047 /* Round. The remainder (<< 1) is in a2. */
1048 bltu a2, a3, .Ldiv_rounded
1050 beq a2, a3, .Ldiv_exactlyhalf
1053 /* Add the exponent to the mantissa. */
1058 /* Add the sign bit. */
1065 bltz a8, .Ldiv_underflow
1066 /* Return +/- Infinity. */
1067 addi a8, a4, 1 /* 0xff */
1072 /* Remainder is exactly half the divisor. Round even. */
1078 /* Create a subnormal value, where the exponent field contains zero,
1079 but the effective exponent is 1. The value of a8 is one less than
1080 the actual exponent, so just negate it to get the shift amount. */
1083 bgeui a8, 32, .Ldiv_flush_to_zero
1085 /* Shift a10 right. Any bits that are shifted out of a10 are
1086 saved in a6 for rounding the result. */
1090 /* Set the exponent to zero. */
1093 /* Pack any nonzero remainder (in a2) into a6. */
1098 /* Round a10 based on the bits shifted out into a6. */
1099 1: bgez a6, .Ldiv_rounded
1102 bnez a6, .Ldiv_rounded
1107 .Ldiv_flush_to_zero:
1108 /* Return zero with the appropriate sign bit. */
1113 #endif /* L_divsf3 */
1117 /* Equal and Not Equal */
1122 .set __nesf2, __eqsf2
1123 .type __eqsf2, @function
1128 /* The values are equal but NaN != NaN. Check the exponent. */
1140 /* Check if the mantissas are nonzero. */
1144 /* Check if x and y are zero with different signs. */
1148 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149 or x when exponent(x) = 0x7f8 and x == y. */
1160 .type __gtsf2, @function
1165 1: bnall a3, a6, .Lle_cmp
1167 /* Check if y is a NaN. */
1173 /* Check if x is a NaN. */
1180 /* Less Than or Equal */
1184 .type __lesf2, @function
1189 1: bnall a3, a6, .Lle_cmp
1191 /* Check if y is a NaN. */
1197 /* Check if x is a NaN. */
1204 /* Check if x and y have different signs. */
1206 bltz a7, .Lle_diff_signs
1208 /* Check if x is negative. */
1211 /* Check if x <= y. */
1217 /* Check if y <= x. */
1225 /* Check if both x and y are zero. */
1234 /* Greater Than or Equal */
1238 .type __gesf2, @function
1243 1: bnall a3, a6, .Llt_cmp
1245 /* Check if y is a NaN. */
1251 /* Check if x is a NaN. */
1262 .type __ltsf2, @function
1267 1: bnall a3, a6, .Llt_cmp
1269 /* Check if y is a NaN. */
1275 /* Check if x is a NaN. */
1282 /* Check if x and y have different signs. */
1284 bltz a7, .Llt_diff_signs
1286 /* Check if x is negative. */
1289 /* Check if x < y. */
1295 /* Check if y < x. */
1303 /* Check if both x and y are nonzero. */
1316 .type __unordsf2, @function
1335 #endif /* L_cmpsf2 */
1341 .type __fixsfsi, @function
1345 /* Check for NaN and Infinity. */
1347 ball a2, a6, .Lfixsfsi_nan_or_inf
1349 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1352 bgei a4, 32, .Lfixsfsi_maxint
1353 blti a4, 1, .Lfixsfsi_zero
1355 /* Add explicit "1.0" and shift << 8. */
1359 /* Shift back to the right, based on the exponent. */
1360 ssl a4 /* shift by 32 - a4 */
1363 /* Negate the result if sign != 0. */
1368 .Lfixsfsi_nan_or_inf:
1369 /* Handle Infinity and NaN. */
1371 beqz a4, .Lfixsfsi_maxint
1373 /* Translate NaN to +maxint. */
1377 slli a4, a6, 8 /* 0x80000000 */
1378 addi a5, a4, -1 /* 0x7fffffff */
1387 #endif /* L_fixsfsi */
1393 .type __fixsfdi, @function
1397 /* Check for NaN and Infinity. */
1399 ball a2, a6, .Lfixsfdi_nan_or_inf
1401 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1404 bgei a4, 64, .Lfixsfdi_maxint
1405 blti a4, 1, .Lfixsfdi_zero
1407 /* Add explicit "1.0" and shift << 8. */
1411 /* Shift back to the right, based on the exponent. */
1412 ssl a4 /* shift by 64 - a4 */
1413 bgei a4, 32, .Lfixsfdi_smallshift
1418 /* Negate the result if sign != 0. */
1426 .Lfixsfdi_smallshift:
1432 .Lfixsfdi_nan_or_inf:
1433 /* Handle Infinity and NaN. */
1435 beqz a4, .Lfixsfdi_maxint
1437 /* Translate NaN to +maxint. */
1441 slli a7, a6, 8 /* 0x80000000 */
1447 1: addi xh, a7, -1 /* 0x7fffffff */
1456 #endif /* L_fixsfdi */
1461 .global __fixunssfsi
1462 .type __fixunssfsi, @function
1466 /* Check for NaN and Infinity. */
1468 ball a2, a6, .Lfixunssfsi_nan_or_inf
1470 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1473 bgei a4, 32, .Lfixunssfsi_maxint
1474 bltz a4, .Lfixunssfsi_zero
1476 /* Add explicit "1.0" and shift << 8. */
1480 /* Shift back to the right, based on the exponent. */
1482 beqi a4, 32, .Lfixunssfsi_bigexp
1483 ssl a4 /* shift by 32 - a4 */
1486 /* Negate the result if sign != 0. */
1491 .Lfixunssfsi_nan_or_inf:
1492 /* Handle Infinity and NaN. */
1494 beqz a4, .Lfixunssfsi_maxint
1496 /* Translate NaN to 0xffffffff. */
1500 .Lfixunssfsi_maxint:
1501 slli a4, a6, 8 /* 0x80000000 */
1502 movi a5, -1 /* 0xffffffff */
1511 .Lfixunssfsi_bigexp:
1512 /* Handle unsigned maximum exponent case. */
1514 mov a2, a5 /* no shift needed */
1517 /* Return 0x80000000 if negative. */
1521 #endif /* L_fixunssfsi */
1526 .global __fixunssfdi
1527 .type __fixunssfdi, @function
1531 /* Check for NaN and Infinity. */
1533 ball a2, a6, .Lfixunssfdi_nan_or_inf
1535 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1538 bgei a4, 64, .Lfixunssfdi_maxint
1539 bltz a4, .Lfixunssfdi_zero
1541 /* Add explicit "1.0" and shift << 8. */
1545 /* Shift back to the right, based on the exponent. */
1547 beqi a4, 64, .Lfixunssfdi_bigexp
1548 ssl a4 /* shift by 64 - a4 */
1549 bgei a4, 32, .Lfixunssfdi_smallshift
1553 .Lfixunssfdi_shifted:
1554 /* Negate the result if sign != 0. */
1562 .Lfixunssfdi_smallshift:
1566 j .Lfixunssfdi_shifted
1568 .Lfixunssfdi_nan_or_inf:
1569 /* Handle Infinity and NaN. */
1571 beqz a4, .Lfixunssfdi_maxint
1573 /* Translate NaN to 0xffffffff.... */
1578 .Lfixunssfdi_maxint:
1580 2: slli xh, a6, 8 /* 0x80000000 */
1589 .Lfixunssfdi_bigexp:
1590 /* Handle unsigned maximum exponent case. */
1593 leaf_return /* no shift needed */
1595 #endif /* L_fixunssfdi */
1600 .global __floatunsisf
1601 .type __floatunsisf, @function
1604 beqz a2, .Lfloatsisf_return
1606 /* Set the sign to zero and jump to the floatsisf code. */
1608 j .Lfloatsisf_normalize
1612 .type __floatsisf, @function
1616 /* Check for zero. */
1617 beqz a2, .Lfloatsisf_return
1619 /* Save the sign. */
1622 /* Get the absolute value. */
1630 .Lfloatsisf_normalize:
1631 /* Normalize with the first 1 bit in the msb. */
1632 do_nsau a4, a2, a5, a6
1636 /* Shift the mantissa into position, with rounding bits in a6. */
1638 slli a6, a5, (32 - 8)
1640 /* Set the exponent. */
1641 movi a5, 0x9d /* 0x7e + 31 */
1650 /* Round up if the leftover fraction is >= 1/2. */
1651 bgez a6, .Lfloatsisf_return
1652 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1654 /* Check if the leftover fraction is exactly 1/2. */
1656 beqz a6, .Lfloatsisf_exactlyhalf
1661 .Lfloatsisf_exactlyhalf:
1662 /* Round down to the nearest even value. */
1667 #endif /* L_floatsisf */
1672 .global __floatundisf
1673 .type __floatundisf, @function
1677 /* Check for zero. */
1681 /* Set the sign to zero and jump to the floatdisf code. */
1683 j .Lfloatdisf_normalize
1687 .type __floatdisf, @function
1691 /* Check for zero. */
1695 /* Save the sign. */
1698 /* Get the absolute value. */
1699 bgez xh, .Lfloatdisf_normalize
1702 beqz xl, .Lfloatdisf_normalize
1705 .Lfloatdisf_normalize:
1706 /* Normalize with the first 1 bit in the msb of xh. */
1707 beqz xh, .Lfloatdisf_bigshift
1708 do_nsau a4, xh, a5, a6
1713 .Lfloatdisf_shifted:
1714 /* Shift the mantissa into position, with rounding bits in a6. */
1723 /* Set the exponent. */
1724 movi a5, 0xbd /* 0x7e + 63 */
1733 /* Round up if the leftover fraction is >= 1/2. */
1735 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1737 /* Check if the leftover fraction is exactly 1/2. */
1739 beqz a6, .Lfloatdisf_exactlyhalf
1742 .Lfloatdisf_bigshift:
1743 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1744 do_nsau a4, xl, a5, a6
1749 j .Lfloatdisf_shifted
1751 .Lfloatdisf_exactlyhalf:
1752 /* Round down to the nearest even value. */
1757 #endif /* L_floatdisf */