1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 In addition to the permissions in the GNU General Public License,
13 the Free Software Foundation gives you unlimited permission to link
14 the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any
16 restriction coming from the use of this file. (The General Public
17 License restrictions do apply in other respects; for example, they
18 cover modification of the file, and distribution when not linked
19 into a combine executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
23 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24 License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
43 /* Warning! The branch displacements for some Xtensa branch instructions
44 are quite small, and this code has been carefully laid out to keep
45 branch targets in range. If you change anything, be sure to check that
46 the assembler is not relaxing anything to branch over a jump. */
52 .type __negsf2, @function
66 /* Handle NaNs and Infinities. (This code is placed before the
67 start of the function just to keep it in range of the limited
68 branch displacements.) */
71 /* If y is neither Infinity nor NaN, return x. */
73 /* If x is a NaN, return it. Otherwise, return y. */
75 beqz a7, .Ladd_ynan_or_inf
84 /* Operand signs differ. Do a subtraction. */
91 .type __addsf3, @function
96 /* Check if the two operands have the same sign. */
98 bltz a7, .Ladd_opposite_signs
101 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
102 ball a2, a6, .Ladd_xnan_or_inf
103 ball a3, a6, .Ladd_ynan_or_inf
105 /* Compare the exponents. The smaller operand will be shifted
106 right by the exponent difference and added to the larger
110 bltu a7, a8, .Ladd_shiftx
113 /* Check if the smaller (or equal) exponent is zero. */
114 bnone a3, a6, .Ladd_yexpzero
116 /* Replace y sign/exponent with 0x008. */
122 /* Compute the exponent difference. */
125 /* Exponent difference > 32 -- just return the bigger value. */
128 /* Shift y right by the exponent difference. Any bits that are
129 shifted out of y are saved in a9 for rounding the result. */
135 /* Do the addition. */
138 /* Check if the add overflowed into the exponent. */
140 beq a10, a7, .Ladd_round
145 /* y is a subnormal value. Replace its sign/exponent with zero,
146 i.e., no implicit "1.0", and increment the apparent exponent
147 because subnormals behave as if they had the minimum (nonzero)
148 exponent. Test for the case when both exponents are zero. */
151 bnone a2, a6, .Ladd_bothexpzero
156 /* Both exponents are zero. Handle this as a special case. There
157 is no need to shift or round, and the normal code for handling
158 a carry into the exponent field will not work because it
159 assumes there is an implicit "1.0" that needs to be added. */
164 /* Same as "yexpzero" except skip handling the case when both
165 exponents are zero. */
172 /* Same thing as the "shifty" code, but with x and y swapped. Also,
173 because the exponent difference is always nonzero in this version,
174 the shift sequence can use SLL and skip loading a constant zero. */
175 bnone a2, a6, .Ladd_xexpzero
183 bgeui a10, 32, .Ladd_returny
191 /* Check if the add overflowed into the exponent. */
193 bne a10, a8, .Ladd_carry
196 /* Round up if the leftover fraction is >= 1/2. */
200 /* Check if the leftover fraction is exactly 1/2. */
202 beqz a9, .Ladd_exactlyhalf
210 /* The addition has overflowed into the exponent field, so the
211 value needs to be renormalized. The mantissa of the result
212 can be recovered by subtracting the original exponent and
213 adding 0x800000 (which is the explicit "1.0" for the
214 mantissa of the non-shifted operand -- the "1.0" for the
215 shifted operand was already added). The mantissa can then
216 be shifted right by one bit. The explicit "1.0" of the
217 shifted mantissa then needs to be replaced by the exponent,
218 incremented by one to account for the normalizing shift.
219 It is faster to combine these operations: do the shift first
220 and combine the additions and subtractions. If x is the
221 original exponent, the result is:
222 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
224 shifted mantissa + ((x + 1) << 22)
225 Note that the exponent is incremented here by leaving the
226 explicit "1.0" of the mantissa in the exponent field. */
228 /* Shift x right by one bit. Save the lsb. */
232 /* See explanation above. The original exponent is in a8. */
237 /* Return an Infinity if the exponent overflowed. */
238 ball a2, a6, .Ladd_infinity
240 /* Same thing as the "round" code except the msb of the leftover
241 fraction is bit 0 of a10, with the rest of the fraction in a9. */
244 beqz a9, .Ladd_exactlyhalf
248 /* Clear the mantissa. */
252 /* The sign bit may have been lost in a carry-out. Put it back. */
258 /* Round down to the nearest even value. */
267 /* Handle NaNs and Infinities. (This code is placed before the
268 start of the function just to keep it in range of the limited
269 branch displacements.) */
272 /* If y is neither Infinity nor NaN, return x. */
274 /* Both x and y are either NaN or Inf, so the result is NaN. */
275 movi a4, 0x400000 /* make it a quiet NaN */
280 /* Negate y and return it. */
285 .Lsub_opposite_signs:
286 /* Operand signs differ. Do an addition. */
293 .type __subsf3, @function
298 /* Check if the two operands have the same sign. */
300 bltz a7, .Lsub_opposite_signs
303 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
304 ball a2, a6, .Lsub_xnan_or_inf
305 ball a3, a6, .Lsub_ynan_or_inf
307 /* Compare the operands. In contrast to addition, the entire
308 value matters here. */
311 bltu a2, a3, .Lsub_xsmaller
314 /* Check if the smaller (or equal) exponent is zero. */
315 bnone a3, a6, .Lsub_yexpzero
317 /* Replace y sign/exponent with 0x008. */
323 /* Compute the exponent difference. */
326 /* Exponent difference > 32 -- just return the bigger value. */
329 /* Shift y right by the exponent difference. Any bits that are
330 shifted out of y are saved in a9 for rounding the result. */
338 /* Subtract the leftover bits in a9 from zero and propagate any
344 /* Check if the subtract underflowed into the exponent. */
346 beq a10, a7, .Lsub_round
350 /* Return zero if the inputs are equal. (For the non-subnormal
351 case, subtracting the "1.0" will cause a borrow from the exponent
352 and this case can be detected when handling the borrow.) */
353 beq a2, a3, .Lsub_return_zero
355 /* y is a subnormal value. Replace its sign/exponent with zero,
356 i.e., no implicit "1.0". Unless x is also a subnormal, increment
357 y's apparent exponent because subnormals behave as if they had
358 the minimum (nonzero) exponent. */
361 bnone a2, a6, .Lsub_yexpdiff
366 /* Negate and return y. */
372 /* Same thing as the "ysmaller" code, but with x and y swapped and
374 bnone a2, a6, .Lsub_xexpzero
382 bgeui a10, 32, .Lsub_returny
399 /* Check if the subtract underflowed into the exponent. */
401 bne a10, a8, .Lsub_borrow
404 /* Round up if the leftover fraction is >= 1/2. */
408 /* Check if the leftover fraction is exactly 1/2. */
410 beqz a9, .Lsub_exactlyhalf
414 /* Same as "yexpzero". */
415 beq a2, a3, .Lsub_return_zero
418 bnone a3, a6, .Lsub_xexpdiff
427 /* The subtraction has underflowed into the exponent field, so the
428 value needs to be renormalized. Shift the mantissa left as
429 needed to remove any leading zeros and adjust the exponent
430 accordingly. If the exponent is not large enough to remove
431 all the leading zeros, the result will be a subnormal value. */
435 do_nsau a6, a8, a7, a11
437 bge a6, a10, .Lsub_subnormal
440 .Lsub_normalize_shift:
441 /* Shift the mantissa (a8/a9) left by a6. */
446 /* Combine the shifted mantissa with the sign and exponent,
447 decrementing the exponent by a6. (The exponent has already
448 been decremented by one due to the borrow from the subtraction,
449 but adding the mantissa will increment the exponent by one.) */
457 /* Round down to the nearest even value. */
463 /* If there was a borrow from the exponent, and the mantissa and
464 guard digits are all zero, then the inputs were equal and the
465 result should be zero. */
466 beqz a9, .Lsub_return_zero
468 /* Only the guard digit is nonzero. Shift by min(24, a10). */
472 j .Lsub_normalize_shift
475 /* The exponent is too small to shift away all the leading zeros.
476 Set a6 to the current exponent (which has already been
477 decremented by the borrow) so that the exponent of the result
478 will be zero. Do not add 1 to a6 in this case, because: (1)
479 adding the mantissa will not increment the exponent, so there is
480 no need to subtract anything extra from the exponent to
481 compensate, and (2) the effective exponent of a subnormal is 1
482 not 0 so the shift amount must be 1 smaller than normal. */
484 j .Lsub_normalize_shift
486 #endif /* L_addsubsf3 */
493 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
494 (This code is placed before the start of the function just to
495 keep it in range of the limited branch displacements.) */
498 /* Clear the sign bit of x. */
502 /* If x is zero, return zero. */
503 beqz a2, .Lmul_return_zero
505 /* Normalize x. Adjust the exponent in a8. */
506 do_nsau a10, a2, a11, a12
515 /* Clear the sign bit of y. */
519 /* If y is zero, return zero. */
520 beqz a3, .Lmul_return_zero
522 /* Normalize y. Adjust the exponent in a9. */
523 do_nsau a10, a3, a11, a12
532 /* Return zero with the appropriate sign bit. */
538 /* If y is zero, return NaN. */
541 movi a4, 0x400000 /* make it a quiet NaN */
545 /* If y is NaN, return y. */
546 bnall a3, a6, .Lmul_returnx
548 beqz a8, .Lmul_returnx
554 /* Set the sign bit and return. */
562 /* If x is zero, return NaN. */
564 bnez a8, .Lmul_returny
565 movi a7, 0x400000 /* make it a quiet NaN */
571 .type __mulsf3, @function
574 #if __XTENSA_CALL0_ABI__
583 /* Get the sign of the result. */
586 /* Check for NaN and infinity. */
587 ball a2, a6, .Lmul_xnan_or_inf
588 ball a3, a6, .Lmul_ynan_or_inf
590 /* Extract the exponents. */
594 beqz a8, .Lmul_xexpzero
596 beqz a9, .Lmul_yexpzero
599 /* Add the exponents. */
602 /* Replace sign/exponent fields with explicit "1.0". */
609 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
611 #if XCHAL_HAVE_MUL32_HIGH
618 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
619 products. These partial products are:
628 If using the Mul16 or Mul32 multiplier options, these input
629 chunks must be stored in separate registers. For Mac16, the
630 UMUL.AA.* opcodes can specify that the inputs come from either
631 half of the registers, so there is no need to shift them out
632 ahead of time. If there is no multiply hardware, the 16-bit
633 chunks can be extracted when setting up the arguments to the
634 separate multiply function. */
636 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
637 /* Calling a separate multiply function will clobber a0 and requires
638 use of a8 as a temporary, so save those values now. (The function
639 uses a custom ABI so nothing else needs to be saved.) */
644 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
649 /* Get the high halves of the inputs into registers. */
656 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
657 /* Clear the high halves of the inputs. This does not matter
658 for MUL16 because the high bits are ignored. */
662 #endif /* MUL16 || MUL32 */
667 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
668 mul16u dst, xreg ## xhalf, yreg ## yhalf
670 #elif XCHAL_HAVE_MUL32
672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673 mull dst, xreg ## xhalf, yreg ## yhalf
675 #elif XCHAL_HAVE_MAC16
677 /* The preprocessor insists on inserting a space when concatenating after
678 a period in the definition of do_mul below. These macros are a workaround
679 using underscores instead of periods when doing the concatenation. */
680 #define umul_aa_ll umul.aa.ll
681 #define umul_aa_lh umul.aa.lh
682 #define umul_aa_hl umul.aa.hl
683 #define umul_aa_hh umul.aa.hh
685 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
686 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
689 #else /* no multiply hardware */
691 #define set_arg_l(dst, src) \
692 extui dst, src, 0, 16
693 #define set_arg_h(dst, src) \
696 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
697 set_arg_ ## xhalf (a13, xreg); \
698 set_arg_ ## yhalf (a14, yreg); \
699 call0 .Lmul_mulsi3; \
703 /* Add pp1 and pp2 into a6 with carry-out in a9. */
704 do_mul(a6, a2, l, a3, h) /* pp 1 */
705 do_mul(a11, a2, h, a3, l) /* pp 2 */
711 /* Shift the high half of a9/a6 into position in a9. Note that
712 this value can be safely incremented without any carry-outs. */
716 /* Compute the low word into a6. */
717 do_mul(a11, a2, l, a3, l) /* pp 0 */
723 /* Compute the high word into a2. */
724 do_mul(a2, a2, h, a3, h) /* pp 3 */
727 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
728 /* Restore values saved on the stack during the multiplication. */
734 /* Shift left by 9 bits, unless there was a carry-out from the
735 multiply, in which case, shift by 8 bits and increment the
746 /* Subtract the extra bias from the exponent sum (plus one to account
747 for the explicit "1.0" of the mantissa that will be added to the
748 exponent in the final result). */
752 /* Check for over/underflow. The value in a8 is one less than the
753 final exponent, so values in the range 0..fd are OK here. */
755 bgeu a8, a4, .Lmul_overflow
759 bgez a6, .Lmul_rounded
762 beqz a6, .Lmul_exactlyhalf
765 /* Add the exponent to the mantissa. */
770 /* Add the sign bit. */
776 #if __XTENSA_CALL0_ABI__
786 /* Round down to the nearest even value. */
792 bltz a8, .Lmul_underflow
793 /* Return +/- Infinity. */
799 /* Create a subnormal value, where the exponent field contains zero,
800 but the effective exponent is 1. The value of a8 is one less than
801 the actual exponent, so just negate it to get the shift amount. */
805 bgeui a8, 32, .Lmul_flush_to_zero
807 /* Shift a2 right. Any bits that are shifted out of a2 are saved
808 in a6 (combined with the shifted-out bits currently in a6) for
809 rounding the result. */
813 /* Set the exponent to zero. */
816 /* Pack any nonzero bits shifted out into a6. */
823 /* Return zero with the appropriate sign bit. */
828 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
830 /* For Xtensa processors with no multiply hardware, this simplified
831 version of _mulsi3 is used for multiplying 16-bit chunks of
832 the floating-point mantissas. It uses a custom ABI: the inputs
833 are passed in a13 and a14, the result is returned in a12, and
834 a8 and a15 are clobbered. */
843 do_addx2 a15, a14, a12, a15
847 do_addx4 a15, a14, a12, a15
851 do_addx8 a15, a14, a12, a15
857 bnez a13, .Lmul_mult_loop
859 #endif /* !MUL16 && !MUL32 && !MAC16 */
860 #endif /* L_mulsf3 */
867 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
868 (This code is placed before the start of the function just to
869 keep it in range of the limited branch displacements.) */
872 /* Clear the sign bit of y. */
876 /* Check for division by zero. */
879 /* Normalize y. Adjust the exponent in a9. */
880 do_nsau a10, a3, a4, a5
889 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
896 movi a4, 0x400000 /* make it a quiet NaN */
901 /* Clear the sign bit of x. */
905 /* If x is zero, return zero. */
906 beqz a2, .Ldiv_return_zero
908 /* Normalize x. Adjust the exponent in a8. */
909 do_nsau a10, a2, a4, a5
918 /* Return zero with the appropriate sign bit. */
924 /* Set the sign bit of the result. */
928 /* If y is NaN or Inf, return NaN. */
930 movi a4, 0x400000 /* make it a quiet NaN */
935 /* If y is Infinity, return zero. */
937 beqz a8, .Ldiv_return_zero
938 /* y is NaN; return it. */
944 .type __divsf3, @function
949 /* Get the sign of the result. */
952 /* Check for NaN and infinity. */
953 ball a2, a6, .Ldiv_xnan_or_inf
954 ball a3, a6, .Ldiv_ynan_or_inf
956 /* Extract the exponents. */
960 beqz a9, .Ldiv_yexpzero
962 beqz a8, .Ldiv_xexpzero
965 /* Subtract the exponents. */
968 /* Replace sign/exponent fields with explicit "1.0". */
975 /* The first digit of the mantissa division must be a one.
976 Shift x (and adjust the exponent) as needed to make this true. */
981 /* Do the first subtraction and shift. */
985 /* Put the quotient into a10. */
988 /* Divide one bit at a time for 23 bits. */
991 loop a9, .Ldiv_loopend
994 /* Shift the quotient << 1. */
997 /* Is this digit a 0 or 1? */
1000 /* Output a 1 and subtract. */
1004 /* Shift the dividend << 1. */
1007 #if !XCHAL_HAVE_LOOPS
1013 /* Add the exponent bias (less one to account for the explicit "1.0"
1014 of the mantissa that will be added to the exponent in the final
1018 /* Check for over/underflow. The value in a8 is one less than the
1019 final exponent, so values in the range 0..fd are OK here. */
1021 bgeu a8, a4, .Ldiv_overflow
1024 /* Round. The remainder (<< 1) is in a2. */
1025 bltu a2, a3, .Ldiv_rounded
1027 beq a2, a3, .Ldiv_exactlyhalf
1030 /* Add the exponent to the mantissa. */
1035 /* Add the sign bit. */
1042 bltz a8, .Ldiv_underflow
1043 /* Return +/- Infinity. */
1044 addi a8, a4, 1 /* 0xff */
1049 /* Remainder is exactly half the divisor. Round even. */
1055 /* Create a subnormal value, where the exponent field contains zero,
1056 but the effective exponent is 1. The value of a8 is one less than
1057 the actual exponent, so just negate it to get the shift amount. */
1060 bgeui a8, 32, .Ldiv_flush_to_zero
1062 /* Shift a10 right. Any bits that are shifted out of a10 are
1063 saved in a6 for rounding the result. */
1067 /* Set the exponent to zero. */
1070 /* Pack any nonzero remainder (in a2) into a6. */
1075 /* Round a10 based on the bits shifted out into a6. */
1076 1: bgez a6, .Ldiv_rounded
1079 bnez a6, .Ldiv_rounded
1084 .Ldiv_flush_to_zero:
1085 /* Return zero with the appropriate sign bit. */
1090 #endif /* L_divsf3 */
1094 /* Equal and Not Equal */
1099 .set __nesf2, __eqsf2
1100 .type __eqsf2, @function
1105 /* The values are equal but NaN != NaN. Check the exponent. */
1117 /* Check if the mantissas are nonzero. */
1121 /* Check if x and y are zero with different signs. */
1125 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1126 or x when exponent(x) = 0x7f8 and x == y. */
1137 .type __gtsf2, @function
1142 1: bnall a3, a6, .Lle_cmp
1144 /* Check if y is a NaN. */
1150 /* Check if x is a NaN. */
1157 /* Less Than or Equal */
1161 .type __lesf2, @function
1166 1: bnall a3, a6, .Lle_cmp
1168 /* Check if y is a NaN. */
1174 /* Check if x is a NaN. */
1181 /* Check if x and y have different signs. */
1183 bltz a7, .Lle_diff_signs
1185 /* Check if x is negative. */
1188 /* Check if x <= y. */
1194 /* Check if y <= x. */
1202 /* Check if both x and y are zero. */
1211 /* Greater Than or Equal */
1215 .type __gesf2, @function
1220 1: bnall a3, a6, .Llt_cmp
1222 /* Check if y is a NaN. */
1228 /* Check if x is a NaN. */
1239 .type __ltsf2, @function
1244 1: bnall a3, a6, .Llt_cmp
1246 /* Check if y is a NaN. */
1252 /* Check if x is a NaN. */
1259 /* Check if x and y have different signs. */
1261 bltz a7, .Llt_diff_signs
1263 /* Check if x is negative. */
1266 /* Check if x < y. */
1272 /* Check if y < x. */
1280 /* Check if both x and y are nonzero. */
1293 .type __unordsf2, @function
1312 #endif /* L_cmpsf2 */
1318 .type __fixsfsi, @function
1322 /* Check for NaN and Infinity. */
1324 ball a2, a6, .Lfixsfsi_nan_or_inf
1326 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1329 bgei a4, 32, .Lfixsfsi_maxint
1330 blti a4, 1, .Lfixsfsi_zero
1332 /* Add explicit "1.0" and shift << 8. */
1336 /* Shift back to the right, based on the exponent. */
1337 ssl a4 /* shift by 32 - a4 */
1340 /* Negate the result if sign != 0. */
1345 .Lfixsfsi_nan_or_inf:
1346 /* Handle Infinity and NaN. */
1348 beqz a4, .Lfixsfsi_maxint
1350 /* Translate NaN to +maxint. */
1354 slli a4, a6, 8 /* 0x80000000 */
1355 addi a5, a4, -1 /* 0x7fffffff */
1364 #endif /* L_fixsfsi */
1370 .type __fixsfdi, @function
1374 /* Check for NaN and Infinity. */
1376 ball a2, a6, .Lfixsfdi_nan_or_inf
1378 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1381 bgei a4, 64, .Lfixsfdi_maxint
1382 blti a4, 1, .Lfixsfdi_zero
1384 /* Add explicit "1.0" and shift << 8. */
1388 /* Shift back to the right, based on the exponent. */
1389 ssl a4 /* shift by 64 - a4 */
1390 bgei a4, 32, .Lfixsfdi_smallshift
1395 /* Negate the result if sign != 0. */
1403 .Lfixsfdi_smallshift:
1409 .Lfixsfdi_nan_or_inf:
1410 /* Handle Infinity and NaN. */
1412 beqz a4, .Lfixsfdi_maxint
1414 /* Translate NaN to +maxint. */
1418 slli a7, a6, 8 /* 0x80000000 */
1424 1: addi xh, a7, -1 /* 0x7fffffff */
1433 #endif /* L_fixsfdi */
1438 .global __fixunssfsi
1439 .type __fixunssfsi, @function
1443 /* Check for NaN and Infinity. */
1445 ball a2, a6, .Lfixunssfsi_nan_or_inf
1447 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1450 bgei a4, 32, .Lfixunssfsi_maxint
1451 bltz a4, .Lfixunssfsi_zero
1453 /* Add explicit "1.0" and shift << 8. */
1457 /* Shift back to the right, based on the exponent. */
1459 beqi a4, 32, .Lfixunssfsi_bigexp
1460 ssl a4 /* shift by 32 - a4 */
1463 /* Negate the result if sign != 0. */
1468 .Lfixunssfsi_nan_or_inf:
1469 /* Handle Infinity and NaN. */
1471 beqz a4, .Lfixunssfsi_maxint
1473 /* Translate NaN to 0xffffffff. */
1477 .Lfixunssfsi_maxint:
1478 slli a4, a6, 8 /* 0x80000000 */
1479 movi a5, -1 /* 0xffffffff */
1488 .Lfixunssfsi_bigexp:
1489 /* Handle unsigned maximum exponent case. */
1491 mov a2, a5 /* no shift needed */
1494 /* Return 0x80000000 if negative. */
1498 #endif /* L_fixunssfsi */
1503 .global __fixunssfdi
1504 .type __fixunssfdi, @function
1508 /* Check for NaN and Infinity. */
1510 ball a2, a6, .Lfixunssfdi_nan_or_inf
1512 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1515 bgei a4, 64, .Lfixunssfdi_maxint
1516 bltz a4, .Lfixunssfdi_zero
1518 /* Add explicit "1.0" and shift << 8. */
1522 /* Shift back to the right, based on the exponent. */
1524 beqi a4, 64, .Lfixunssfdi_bigexp
1525 ssl a4 /* shift by 64 - a4 */
1526 bgei a4, 32, .Lfixunssfdi_smallshift
1530 .Lfixunssfdi_shifted:
1531 /* Negate the result if sign != 0. */
1539 .Lfixunssfdi_smallshift:
1543 j .Lfixunssfdi_shifted
1545 .Lfixunssfdi_nan_or_inf:
1546 /* Handle Infinity and NaN. */
1548 beqz a4, .Lfixunssfdi_maxint
1550 /* Translate NaN to 0xffffffff.... */
1555 .Lfixunssfdi_maxint:
1557 2: slli xh, a6, 8 /* 0x80000000 */
1566 .Lfixunssfdi_bigexp:
1567 /* Handle unsigned maximum exponent case. */
1570 abi_return /* no shift needed */
1572 #endif /* L_fixunssfdi */
1577 .global __floatunsisf
1578 .type __floatunsisf, @function
1581 beqz a2, .Lfloatsisf_return
1583 /* Set the sign to zero and jump to the floatsisf code. */
1585 j .Lfloatsisf_normalize
1589 .type __floatsisf, @function
1593 /* Check for zero. */
1594 beqz a2, .Lfloatsisf_return
1596 /* Save the sign. */
1599 /* Get the absolute value. */
1607 .Lfloatsisf_normalize:
1608 /* Normalize with the first 1 bit in the msb. */
1609 do_nsau a4, a2, a5, a6
1613 /* Shift the mantissa into position, with rounding bits in a6. */
1615 slli a6, a5, (32 - 8)
1617 /* Set the exponent. */
1618 movi a5, 0x9d /* 0x7e + 31 */
1627 /* Round up if the leftover fraction is >= 1/2. */
1628 bgez a6, .Lfloatsisf_return
1629 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1631 /* Check if the leftover fraction is exactly 1/2. */
1633 beqz a6, .Lfloatsisf_exactlyhalf
1638 .Lfloatsisf_exactlyhalf:
1639 /* Round down to the nearest even value. */
1644 #endif /* L_floatsisf */
1649 .global __floatundisf
1650 .type __floatundisf, @function
1654 /* Check for zero. */
1658 /* Set the sign to zero and jump to the floatdisf code. */
1660 j .Lfloatdisf_normalize
1664 .type __floatdisf, @function
1668 /* Check for zero. */
1672 /* Save the sign. */
1675 /* Get the absolute value. */
1676 bgez xh, .Lfloatdisf_normalize
1679 beqz xl, .Lfloatdisf_normalize
1682 .Lfloatdisf_normalize:
1683 /* Normalize with the first 1 bit in the msb of xh. */
1684 beqz xh, .Lfloatdisf_bigshift
1685 do_nsau a4, xh, a5, a6
1690 .Lfloatdisf_shifted:
1691 /* Shift the mantissa into position, with rounding bits in a6. */
1700 /* Set the exponent. */
1701 movi a5, 0xbd /* 0x7e + 63 */
1710 /* Round up if the leftover fraction is >= 1/2. */
1712 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1714 /* Check if the leftover fraction is exactly 1/2. */
1716 beqz a6, .Lfloatdisf_exactlyhalf
1719 .Lfloatdisf_bigshift:
1720 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1721 do_nsau a4, xl, a5, a6
1726 j .Lfloatdisf_shifted
1728 .Lfloatdisf_exactlyhalf:
1729 /* Round down to the nearest even value. */
1734 #endif /* L_floatdisf */