1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 In addition to the permissions in the GNU General Public License,
13 the Free Software Foundation gives you unlimited permission to link
14 the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any
16 restriction coming from the use of this file. (The General Public
17 License restrictions do apply in other respects; for example, they
18 cover modification of the file, and distribution when not linked
19 into a combine executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
23 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24 License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
43 /* Warning! The branch displacements for some Xtensa branch instructions
44 are quite small, and this code has been carefully laid out to keep
45 branch targets in range. If you change anything, be sure to check that
46 the assembler is not relaxing anything to branch over a jump. */
52 .type __negsf2, @function
66 /* Handle NaNs and Infinities. (This code is placed before the
67 start of the function just to keep it in range of the limited
68 branch displacements.) */
71 /* If y is neither Infinity nor NaN, return x. */
73 /* If x is a NaN, return it. Otherwise, return y. */
75 beqz a7, .Ladd_ynan_or_inf
84 /* Operand signs differ. Do a subtraction. */
91 .type __addsf3, @function
96 /* Check if the two operands have the same sign. */
98 bltz a7, .Ladd_opposite_signs
101 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
102 ball a2, a6, .Ladd_xnan_or_inf
103 ball a3, a6, .Ladd_ynan_or_inf
105 /* Compare the exponents. The smaller operand will be shifted
106 right by the exponent difference and added to the larger
110 bltu a7, a8, .Ladd_shiftx
113 /* Check if the smaller (or equal) exponent is zero. */
114 bnone a3, a6, .Ladd_yexpzero
116 /* Replace y sign/exponent with 0x008. */
122 /* Compute the exponent difference. */
125 /* Exponent difference > 32 -- just return the bigger value. */
128 /* Shift y right by the exponent difference. Any bits that are
129 shifted out of y are saved in a9 for rounding the result. */
135 /* Do the addition. */
138 /* Check if the add overflowed into the exponent. */
140 beq a10, a7, .Ladd_round
145 /* y is a subnormal value. Replace its sign/exponent with zero,
146 i.e., no implicit "1.0", and increment the apparent exponent
147 because subnormals behave as if they had the minimum (nonzero)
148 exponent. Test for the case when both exponents are zero. */
151 bnone a2, a6, .Ladd_bothexpzero
156 /* Both exponents are zero. Handle this as a special case. There
157 is no need to shift or round, and the normal code for handling
158 a carry into the exponent field will not work because it
159 assumes there is an implicit "1.0" that needs to be added. */
164 /* Same as "yexpzero" except skip handling the case when both
165 exponents are zero. */
172 /* Same thing as the "shifty" code, but with x and y swapped. Also,
173 because the exponent difference is always nonzero in this version,
174 the shift sequence can use SLL and skip loading a constant zero. */
175 bnone a2, a6, .Ladd_xexpzero
183 bgeui a10, 32, .Ladd_returny
191 /* Check if the add overflowed into the exponent. */
193 bne a10, a8, .Ladd_carry
196 /* Round up if the leftover fraction is >= 1/2. */
200 /* Check if the leftover fraction is exactly 1/2. */
202 beqz a9, .Ladd_exactlyhalf
210 /* The addition has overflowed into the exponent field, so the
211 value needs to be renormalized. The mantissa of the result
212 can be recovered by subtracting the original exponent and
213 adding 0x800000 (which is the explicit "1.0" for the
214 mantissa of the non-shifted operand -- the "1.0" for the
215 shifted operand was already added). The mantissa can then
216 be shifted right by one bit. The explicit "1.0" of the
217 shifted mantissa then needs to be replaced by the exponent,
218 incremented by one to account for the normalizing shift.
219 It is faster to combine these operations: do the shift first
220 and combine the additions and subtractions. If x is the
221 original exponent, the result is:
222 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
224 shifted mantissa + ((x + 1) << 22)
225 Note that the exponent is incremented here by leaving the
226 explicit "1.0" of the mantissa in the exponent field. */
228 /* Shift x right by one bit. Save the lsb. */
232 /* See explanation above. The original exponent is in a8. */
237 /* Return an Infinity if the exponent overflowed. */
238 ball a2, a6, .Ladd_infinity
240 /* Same thing as the "round" code except the msb of the leftover
241 fraction is bit 0 of a10, with the rest of the fraction in a9. */
244 beqz a9, .Ladd_exactlyhalf
248 /* Clear the mantissa. */
252 /* The sign bit may have been lost in a carry-out. Put it back. */
258 /* Round down to the nearest even value. */
267 /* Handle NaNs and Infinities. (This code is placed before the
268 start of the function just to keep it in range of the limited
269 branch displacements.) */
272 /* If y is neither Infinity nor NaN, return x. */
274 /* Both x and y are either NaN or Inf, so the result is NaN. */
275 movi a4, 0x400000 /* make it a quiet NaN */
280 /* Negate y and return it. */
285 .Lsub_opposite_signs:
286 /* Operand signs differ. Do an addition. */
293 .type __subsf3, @function
298 /* Check if the two operands have the same sign. */
300 bltz a7, .Lsub_opposite_signs
303 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
304 ball a2, a6, .Lsub_xnan_or_inf
305 ball a3, a6, .Lsub_ynan_or_inf
307 /* Compare the operands. In contrast to addition, the entire
308 value matters here. */
311 bltu a2, a3, .Lsub_xsmaller
314 /* Check if the smaller (or equal) exponent is zero. */
315 bnone a3, a6, .Lsub_yexpzero
317 /* Replace y sign/exponent with 0x008. */
323 /* Compute the exponent difference. */
326 /* Exponent difference > 32 -- just return the bigger value. */
329 /* Shift y right by the exponent difference. Any bits that are
330 shifted out of y are saved in a9 for rounding the result. */
338 /* Subtract the leftover bits in a9 from zero and propagate any
344 /* Check if the subtract underflowed into the exponent. */
346 beq a10, a7, .Lsub_round
350 /* Return zero if the inputs are equal. (For the non-subnormal
351 case, subtracting the "1.0" will cause a borrow from the exponent
352 and this case can be detected when handling the borrow.) */
353 beq a2, a3, .Lsub_return_zero
355 /* y is a subnormal value. Replace its sign/exponent with zero,
356 i.e., no implicit "1.0". Unless x is also a subnormal, increment
357 y's apparent exponent because subnormals behave as if they had
358 the minimum (nonzero) exponent. */
361 bnone a2, a6, .Lsub_yexpdiff
366 /* Negate and return y. */
372 /* Same thing as the "ysmaller" code, but with x and y swapped and
374 bnone a2, a6, .Lsub_xexpzero
382 bgeui a10, 32, .Lsub_returny
399 /* Check if the subtract underflowed into the exponent. */
401 bne a10, a8, .Lsub_borrow
404 /* Round up if the leftover fraction is >= 1/2. */
408 /* Check if the leftover fraction is exactly 1/2. */
410 beqz a9, .Lsub_exactlyhalf
414 /* Same as "yexpzero". */
415 beq a2, a3, .Lsub_return_zero
418 bnone a3, a6, .Lsub_xexpdiff
427 /* The subtraction has underflowed into the exponent field, so the
428 value needs to be renormalized. Shift the mantissa left as
429 needed to remove any leading zeros and adjust the exponent
430 accordingly. If the exponent is not large enough to remove
431 all the leading zeros, the result will be a subnormal value. */
435 do_nsau a6, a8, a7, a11
437 bge a6, a10, .Lsub_subnormal
440 .Lsub_normalize_shift:
441 /* Shift the mantissa (a8/a9) left by a6. */
446 /* Combine the shifted mantissa with the sign and exponent,
447 decrementing the exponent by a6. (The exponent has already
448 been decremented by one due to the borrow from the subtraction,
449 but adding the mantissa will increment the exponent by one.) */
457 /* Round down to the nearest even value. */
463 /* If there was a borrow from the exponent, and the mantissa and
464 guard digits are all zero, then the inputs were equal and the
465 result should be zero. */
466 beqz a9, .Lsub_return_zero
468 /* Only the guard digit is nonzero. Shift by min(24, a10). */
472 j .Lsub_normalize_shift
475 /* The exponent is too small to shift away all the leading zeros.
476 Set a6 to the current exponent (which has already been
477 decremented by the borrow) so that the exponent of the result
478 will be zero. Do not add 1 to a6 in this case, because: (1)
479 adding the mantissa will not increment the exponent, so there is
480 no need to subtract anything extra from the exponent to
481 compensate, and (2) the effective exponent of a subnormal is 1
482 not 0 so the shift amount must be 1 smaller than normal. */
484 j .Lsub_normalize_shift
486 #endif /* L_addsubsf3 */
491 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
492 #define XCHAL_NO_MUL 1
497 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
498 (This code is placed before the start of the function just to
499 keep it in range of the limited branch displacements.) */
502 /* Clear the sign bit of x. */
506 /* If x is zero, return zero. */
507 beqz a2, .Lmul_return_zero
509 /* Normalize x. Adjust the exponent in a8. */
510 do_nsau a10, a2, a11, a12
519 /* Clear the sign bit of y. */
523 /* If y is zero, return zero. */
524 beqz a3, .Lmul_return_zero
526 /* Normalize y. Adjust the exponent in a9. */
527 do_nsau a10, a3, a11, a12
536 /* Return zero with the appropriate sign bit. */
542 /* If y is zero, return NaN. */
545 movi a4, 0x400000 /* make it a quiet NaN */
549 /* If y is NaN, return y. */
550 bnall a3, a6, .Lmul_returnx
552 beqz a8, .Lmul_returnx
558 /* Set the sign bit and return. */
566 /* If x is zero, return NaN. */
568 bnez a8, .Lmul_returny
569 movi a7, 0x400000 /* make it a quiet NaN */
575 .type __mulsf3, @function
577 #if __XTENSA_CALL0_ABI__
585 /* This is not really a leaf function; allocate enough stack space
586 to allow CALL12s to a helper function. */
593 /* Get the sign of the result. */
596 /* Check for NaN and infinity. */
597 ball a2, a6, .Lmul_xnan_or_inf
598 ball a3, a6, .Lmul_ynan_or_inf
600 /* Extract the exponents. */
604 beqz a8, .Lmul_xexpzero
606 beqz a9, .Lmul_yexpzero
609 /* Add the exponents. */
612 /* Replace sign/exponent fields with explicit "1.0". */
619 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
621 #if XCHAL_HAVE_MUL32_HIGH
628 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
629 products. These partial products are:
638 If using the Mul16 or Mul32 multiplier options, these input
639 chunks must be stored in separate registers. For Mac16, the
640 UMUL.AA.* opcodes can specify that the inputs come from either
641 half of the registers, so there is no need to shift them out
642 ahead of time. If there is no multiply hardware, the 16-bit
643 chunks can be extracted when setting up the arguments to the
644 separate multiply function. */
646 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
647 /* Calling a separate multiply function will clobber a0 and requires
648 use of a8 as a temporary, so save those values now. (The function
649 uses a custom ABI so nothing else needs to be saved.) */
654 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
659 /* Get the high halves of the inputs into registers. */
666 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
667 /* Clear the high halves of the inputs. This does not matter
668 for MUL16 because the high bits are ignored. */
672 #endif /* MUL16 || MUL32 */
677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678 mul16u dst, xreg ## xhalf, yreg ## yhalf
680 #elif XCHAL_HAVE_MUL32
682 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
683 mull dst, xreg ## xhalf, yreg ## yhalf
685 #elif XCHAL_HAVE_MAC16
687 /* The preprocessor insists on inserting a space when concatenating after
688 a period in the definition of do_mul below. These macros are a workaround
689 using underscores instead of periods when doing the concatenation. */
690 #define umul_aa_ll umul.aa.ll
691 #define umul_aa_lh umul.aa.lh
692 #define umul_aa_hl umul.aa.hl
693 #define umul_aa_hh umul.aa.hh
695 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
696 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
699 #else /* no multiply hardware */
701 #define set_arg_l(dst, src) \
702 extui dst, src, 0, 16
703 #define set_arg_h(dst, src) \
706 #if __XTENSA_CALL0_ABI__
707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 set_arg_ ## xhalf (a13, xreg); \
709 set_arg_ ## yhalf (a14, yreg); \
710 call0 .Lmul_mulsi3; \
713 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
714 set_arg_ ## xhalf (a14, xreg); \
715 set_arg_ ## yhalf (a15, yreg); \
716 call12 .Lmul_mulsi3; \
718 #endif /* __XTENSA_CALL0_ABI__ */
720 #endif /* no multiply hardware */
722 /* Add pp1 and pp2 into a6 with carry-out in a9. */
723 do_mul(a6, a2, l, a3, h) /* pp 1 */
724 do_mul(a11, a2, h, a3, l) /* pp 2 */
730 /* Shift the high half of a9/a6 into position in a9. Note that
731 this value can be safely incremented without any carry-outs. */
735 /* Compute the low word into a6. */
736 do_mul(a11, a2, l, a3, l) /* pp 0 */
742 /* Compute the high word into a2. */
743 do_mul(a2, a2, h, a3, h) /* pp 3 */
746 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
747 /* Restore values saved on the stack during the multiplication. */
751 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
753 /* Shift left by 9 bits, unless there was a carry-out from the
754 multiply, in which case, shift by 8 bits and increment the
765 /* Subtract the extra bias from the exponent sum (plus one to account
766 for the explicit "1.0" of the mantissa that will be added to the
767 exponent in the final result). */
771 /* Check for over/underflow. The value in a8 is one less than the
772 final exponent, so values in the range 0..fd are OK here. */
774 bgeu a8, a4, .Lmul_overflow
778 bgez a6, .Lmul_rounded
781 beqz a6, .Lmul_exactlyhalf
784 /* Add the exponent to the mantissa. */
789 /* Add the sign bit. */
795 #if __XTENSA_CALL0_ABI__
805 /* Round down to the nearest even value. */
811 bltz a8, .Lmul_underflow
812 /* Return +/- Infinity. */
818 /* Create a subnormal value, where the exponent field contains zero,
819 but the effective exponent is 1. The value of a8 is one less than
820 the actual exponent, so just negate it to get the shift amount. */
824 bgeui a8, 32, .Lmul_flush_to_zero
826 /* Shift a2 right. Any bits that are shifted out of a2 are saved
827 in a6 (combined with the shifted-out bits currently in a6) for
828 rounding the result. */
832 /* Set the exponent to zero. */
835 /* Pack any nonzero bits shifted out into a6. */
842 /* Return zero with the appropriate sign bit. */
849 /* For Xtensa processors with no multiply hardware, this simplified
850 version of _mulsi3 is used for multiplying 16-bit chunks of
851 the floating-point mantissas. When using CALL0, this function
852 uses a custom ABI: the inputs are passed in a13 and a14, the
853 result is returned in a12, and a8 and a15 are clobbered. */
857 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
859 1: add \tmp1, \src2, \dst
860 extui \tmp2, \src1, 0, 1
861 movnez \dst, \tmp1, \tmp2
863 do_addx2 \tmp1, \src2, \dst, \tmp1
864 extui \tmp2, \src1, 1, 1
865 movnez \dst, \tmp1, \tmp2
867 do_addx4 \tmp1, \src2, \dst, \tmp1
868 extui \tmp2, \src1, 2, 1
869 movnez \dst, \tmp1, \tmp2
871 do_addx8 \tmp1, \src2, \dst, \tmp1
872 extui \tmp2, \src1, 3, 1
873 movnez \dst, \tmp1, \tmp2
879 #if __XTENSA_CALL0_ABI__
880 mul_mulsi3_body a12, a13, a14, a15, a8
882 /* The result will be written into a2, so save that argument in a4. */
884 mul_mulsi3_body a2, a4, a3, a5, a6
887 #endif /* XCHAL_NO_MUL */
888 #endif /* L_mulsf3 */
895 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
896 (This code is placed before the start of the function just to
897 keep it in range of the limited branch displacements.) */
900 /* Clear the sign bit of y. */
904 /* Check for division by zero. */
907 /* Normalize y. Adjust the exponent in a9. */
908 do_nsau a10, a3, a4, a5
917 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
924 movi a4, 0x400000 /* make it a quiet NaN */
929 /* Clear the sign bit of x. */
933 /* If x is zero, return zero. */
934 beqz a2, .Ldiv_return_zero
936 /* Normalize x. Adjust the exponent in a8. */
937 do_nsau a10, a2, a4, a5
946 /* Return zero with the appropriate sign bit. */
952 /* Set the sign bit of the result. */
956 /* If y is NaN or Inf, return NaN. */
958 movi a4, 0x400000 /* make it a quiet NaN */
963 /* If y is Infinity, return zero. */
965 beqz a8, .Ldiv_return_zero
966 /* y is NaN; return it. */
972 .type __divsf3, @function
977 /* Get the sign of the result. */
980 /* Check for NaN and infinity. */
981 ball a2, a6, .Ldiv_xnan_or_inf
982 ball a3, a6, .Ldiv_ynan_or_inf
984 /* Extract the exponents. */
988 beqz a9, .Ldiv_yexpzero
990 beqz a8, .Ldiv_xexpzero
993 /* Subtract the exponents. */
996 /* Replace sign/exponent fields with explicit "1.0". */
1003 /* The first digit of the mantissa division must be a one.
1004 Shift x (and adjust the exponent) as needed to make this true. */
1009 /* Do the first subtraction and shift. */
1013 /* Put the quotient into a10. */
1016 /* Divide one bit at a time for 23 bits. */
1018 #if XCHAL_HAVE_LOOPS
1019 loop a9, .Ldiv_loopend
1022 /* Shift the quotient << 1. */
1025 /* Is this digit a 0 or 1? */
1028 /* Output a 1 and subtract. */
1032 /* Shift the dividend << 1. */
1035 #if !XCHAL_HAVE_LOOPS
1041 /* Add the exponent bias (less one to account for the explicit "1.0"
1042 of the mantissa that will be added to the exponent in the final
1046 /* Check for over/underflow. The value in a8 is one less than the
1047 final exponent, so values in the range 0..fd are OK here. */
1049 bgeu a8, a4, .Ldiv_overflow
1052 /* Round. The remainder (<< 1) is in a2. */
1053 bltu a2, a3, .Ldiv_rounded
1055 beq a2, a3, .Ldiv_exactlyhalf
1058 /* Add the exponent to the mantissa. */
1063 /* Add the sign bit. */
1070 bltz a8, .Ldiv_underflow
1071 /* Return +/- Infinity. */
1072 addi a8, a4, 1 /* 0xff */
1077 /* Remainder is exactly half the divisor. Round even. */
1083 /* Create a subnormal value, where the exponent field contains zero,
1084 but the effective exponent is 1. The value of a8 is one less than
1085 the actual exponent, so just negate it to get the shift amount. */
1088 bgeui a8, 32, .Ldiv_flush_to_zero
1090 /* Shift a10 right. Any bits that are shifted out of a10 are
1091 saved in a6 for rounding the result. */
1095 /* Set the exponent to zero. */
1098 /* Pack any nonzero remainder (in a2) into a6. */
1103 /* Round a10 based on the bits shifted out into a6. */
1104 1: bgez a6, .Ldiv_rounded
1107 bnez a6, .Ldiv_rounded
1112 .Ldiv_flush_to_zero:
1113 /* Return zero with the appropriate sign bit. */
1118 #endif /* L_divsf3 */
1122 /* Equal and Not Equal */
1127 .set __nesf2, __eqsf2
1128 .type __eqsf2, @function
1133 /* The values are equal but NaN != NaN. Check the exponent. */
1145 /* Check if the mantissas are nonzero. */
1149 /* Check if x and y are zero with different signs. */
1153 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1154 or x when exponent(x) = 0x7f8 and x == y. */
1165 .type __gtsf2, @function
1170 1: bnall a3, a6, .Lle_cmp
1172 /* Check if y is a NaN. */
1178 /* Check if x is a NaN. */
1185 /* Less Than or Equal */
1189 .type __lesf2, @function
1194 1: bnall a3, a6, .Lle_cmp
1196 /* Check if y is a NaN. */
1202 /* Check if x is a NaN. */
1209 /* Check if x and y have different signs. */
1211 bltz a7, .Lle_diff_signs
1213 /* Check if x is negative. */
1216 /* Check if x <= y. */
1222 /* Check if y <= x. */
1230 /* Check if both x and y are zero. */
1239 /* Greater Than or Equal */
1243 .type __gesf2, @function
1248 1: bnall a3, a6, .Llt_cmp
1250 /* Check if y is a NaN. */
1256 /* Check if x is a NaN. */
1267 .type __ltsf2, @function
1272 1: bnall a3, a6, .Llt_cmp
1274 /* Check if y is a NaN. */
1280 /* Check if x is a NaN. */
1287 /* Check if x and y have different signs. */
1289 bltz a7, .Llt_diff_signs
1291 /* Check if x is negative. */
1294 /* Check if x < y. */
1300 /* Check if y < x. */
1308 /* Check if both x and y are nonzero. */
1321 .type __unordsf2, @function
1340 #endif /* L_cmpsf2 */
1346 .type __fixsfsi, @function
1350 /* Check for NaN and Infinity. */
1352 ball a2, a6, .Lfixsfsi_nan_or_inf
1354 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1357 bgei a4, 32, .Lfixsfsi_maxint
1358 blti a4, 1, .Lfixsfsi_zero
1360 /* Add explicit "1.0" and shift << 8. */
1364 /* Shift back to the right, based on the exponent. */
1365 ssl a4 /* shift by 32 - a4 */
1368 /* Negate the result if sign != 0. */
1373 .Lfixsfsi_nan_or_inf:
1374 /* Handle Infinity and NaN. */
1376 beqz a4, .Lfixsfsi_maxint
1378 /* Translate NaN to +maxint. */
1382 slli a4, a6, 8 /* 0x80000000 */
1383 addi a5, a4, -1 /* 0x7fffffff */
1392 #endif /* L_fixsfsi */
1398 .type __fixsfdi, @function
1402 /* Check for NaN and Infinity. */
1404 ball a2, a6, .Lfixsfdi_nan_or_inf
1406 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1409 bgei a4, 64, .Lfixsfdi_maxint
1410 blti a4, 1, .Lfixsfdi_zero
1412 /* Add explicit "1.0" and shift << 8. */
1416 /* Shift back to the right, based on the exponent. */
1417 ssl a4 /* shift by 64 - a4 */
1418 bgei a4, 32, .Lfixsfdi_smallshift
1423 /* Negate the result if sign != 0. */
1431 .Lfixsfdi_smallshift:
1437 .Lfixsfdi_nan_or_inf:
1438 /* Handle Infinity and NaN. */
1440 beqz a4, .Lfixsfdi_maxint
1442 /* Translate NaN to +maxint. */
1446 slli a7, a6, 8 /* 0x80000000 */
1452 1: addi xh, a7, -1 /* 0x7fffffff */
1461 #endif /* L_fixsfdi */
1466 .global __fixunssfsi
1467 .type __fixunssfsi, @function
1471 /* Check for NaN and Infinity. */
1473 ball a2, a6, .Lfixunssfsi_nan_or_inf
1475 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1478 bgei a4, 32, .Lfixunssfsi_maxint
1479 bltz a4, .Lfixunssfsi_zero
1481 /* Add explicit "1.0" and shift << 8. */
1485 /* Shift back to the right, based on the exponent. */
1487 beqi a4, 32, .Lfixunssfsi_bigexp
1488 ssl a4 /* shift by 32 - a4 */
1491 /* Negate the result if sign != 0. */
1496 .Lfixunssfsi_nan_or_inf:
1497 /* Handle Infinity and NaN. */
1499 beqz a4, .Lfixunssfsi_maxint
1501 /* Translate NaN to 0xffffffff. */
1505 .Lfixunssfsi_maxint:
1506 slli a4, a6, 8 /* 0x80000000 */
1507 movi a5, -1 /* 0xffffffff */
1516 .Lfixunssfsi_bigexp:
1517 /* Handle unsigned maximum exponent case. */
1519 mov a2, a5 /* no shift needed */
1522 /* Return 0x80000000 if negative. */
1526 #endif /* L_fixunssfsi */
1531 .global __fixunssfdi
1532 .type __fixunssfdi, @function
1536 /* Check for NaN and Infinity. */
1538 ball a2, a6, .Lfixunssfdi_nan_or_inf
1540 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1543 bgei a4, 64, .Lfixunssfdi_maxint
1544 bltz a4, .Lfixunssfdi_zero
1546 /* Add explicit "1.0" and shift << 8. */
1550 /* Shift back to the right, based on the exponent. */
1552 beqi a4, 64, .Lfixunssfdi_bigexp
1553 ssl a4 /* shift by 64 - a4 */
1554 bgei a4, 32, .Lfixunssfdi_smallshift
1558 .Lfixunssfdi_shifted:
1559 /* Negate the result if sign != 0. */
1567 .Lfixunssfdi_smallshift:
1571 j .Lfixunssfdi_shifted
1573 .Lfixunssfdi_nan_or_inf:
1574 /* Handle Infinity and NaN. */
1576 beqz a4, .Lfixunssfdi_maxint
1578 /* Translate NaN to 0xffffffff.... */
1583 .Lfixunssfdi_maxint:
1585 2: slli xh, a6, 8 /* 0x80000000 */
1594 .Lfixunssfdi_bigexp:
1595 /* Handle unsigned maximum exponent case. */
1598 leaf_return /* no shift needed */
1600 #endif /* L_fixunssfdi */
1605 .global __floatunsisf
1606 .type __floatunsisf, @function
1609 beqz a2, .Lfloatsisf_return
1611 /* Set the sign to zero and jump to the floatsisf code. */
1613 j .Lfloatsisf_normalize
1617 .type __floatsisf, @function
1621 /* Check for zero. */
1622 beqz a2, .Lfloatsisf_return
1624 /* Save the sign. */
1627 /* Get the absolute value. */
1635 .Lfloatsisf_normalize:
1636 /* Normalize with the first 1 bit in the msb. */
1637 do_nsau a4, a2, a5, a6
1641 /* Shift the mantissa into position, with rounding bits in a6. */
1643 slli a6, a5, (32 - 8)
1645 /* Set the exponent. */
1646 movi a5, 0x9d /* 0x7e + 31 */
1655 /* Round up if the leftover fraction is >= 1/2. */
1656 bgez a6, .Lfloatsisf_return
1657 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1659 /* Check if the leftover fraction is exactly 1/2. */
1661 beqz a6, .Lfloatsisf_exactlyhalf
1666 .Lfloatsisf_exactlyhalf:
1667 /* Round down to the nearest even value. */
1672 #endif /* L_floatsisf */
1677 .global __floatundisf
1678 .type __floatundisf, @function
1682 /* Check for zero. */
1686 /* Set the sign to zero and jump to the floatdisf code. */
1688 j .Lfloatdisf_normalize
1692 .type __floatdisf, @function
1696 /* Check for zero. */
1700 /* Save the sign. */
1703 /* Get the absolute value. */
1704 bgez xh, .Lfloatdisf_normalize
1707 beqz xl, .Lfloatdisf_normalize
1710 .Lfloatdisf_normalize:
1711 /* Normalize with the first 1 bit in the msb of xh. */
1712 beqz xh, .Lfloatdisf_bigshift
1713 do_nsau a4, xh, a5, a6
1718 .Lfloatdisf_shifted:
1719 /* Shift the mantissa into position, with rounding bits in a6. */
1728 /* Set the exponent. */
1729 movi a5, 0xbd /* 0x7e + 63 */
1738 /* Round up if the leftover fraction is >= 1/2. */
1740 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1742 /* Check if the leftover fraction is exactly 1/2. */
1744 beqz a6, .Lfloatdisf_exactlyhalf
1747 .Lfloatdisf_bigshift:
1748 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1749 do_nsau a4, xl, a5, a6
1754 j .Lfloatdisf_shifted
1756 .Lfloatdisf_exactlyhalf:
1757 /* Round down to the nearest even value. */
1762 #endif /* L_floatdisf */