1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003-2018 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
28 * The goal of this code is to be as fast as possible. This is
29 * not meant to be easy to understand for the casual reader.
31 * Only the default rounding mode is intended for best performances.
32 * Exceptions aren't supported yet, but that can be added quite easily
33 * if necessary without impacting performances.
35 * In the CFI related comments, 'previousOffset' refers to the previous offset
36 * from sp used to compute the CFA.
42 ARM_FUNC_ALIAS aeabi_fneg negsf2
45 eor r0, r0, #0x80000000 @ flip sign bit
54 #ifdef L_arm_addsubsf3
56 ARM_FUNC_START aeabi_frsub
59 eor r0, r0, #0x80000000 @ flip sign bit of first arg
63 ARM_FUNC_ALIAS aeabi_fsub subsf3
65 eor r1, r1, #0x80000000 @ flip sign bit of second arg
66 #if defined(__INTERWORKING_STUBS__)
67 b 1f @ Skip Thumb-code prologue
71 ARM_FUNC_ALIAS aeabi_fadd addsf3
73 1: @ Look for zeroes, equal values, INF, or NAN.
76 COND(mov,s,ne) r3, r1, lsl #1
78 COND(mvn,s,ne) ip, r2, asr #24
79 COND(mvn,s,ne) ip, r3, asr #24
82 @ Compute exponent difference. Make largest exponent in r2,
83 @ corresponding arg in r0, and positive exponent difference in r3.
85 rsbs r3, r2, r3, lsr #24
94 @ If exponent difference is too large, return largest argument
95 @ already in r0. We need up to 25 bit to handle proper rounding
101 @ Convert mantissa to signed integer.
103 orr r0, r0, #0x00800000
104 bic r0, r0, #0xff000000
108 orr r1, r1, #0x00800000
109 bic r1, r1, #0xff000000
113 @ If exponent == difference, one or both args were denormalized.
114 @ Since this is not common case, rescale them off line.
119 @ Compensate for the exponent overlapping the mantissa MSB added later
122 @ Shift and add second arg to first arg in r0.
123 @ Keep leftover bits into r1.
124 shiftop adds r0 r0 r1 asr r3 ip
126 shift1 lsl, r1, r1, r3
128 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
129 and r3, r0, #0x80000000
131 #if defined(__thumb2__)
133 sbc r0, r0, r0, lsl #1
139 @ Determine how to normalize the result.
146 @ Result needs to be shifted right.
151 @ Make sure we did not bust our exponent.
155 @ Our result is now properly aligned into r0, remaining bits in r1.
156 @ Pack final result together.
157 @ Round with MSB of r1. If halfway between two numbers, round towards
161 adc r0, r0, r2, lsl #23
167 @ Result must be shifted left and exponent adjusted.
175 @ No rounding necessary since r1 will always be 0 at this point.
181 moveq r0, r0, lsl #12
201 shift1 lsl, r0, r0, ip
205 @ Final result with sign
206 @ If exponent negative, denormalize result.
208 addge r0, r0, r2, lsl #23
211 #if defined(__thumb2__)
216 orrlt r0, r3, r0, lsr r2
220 @ Fixup and adjust bit position for denormalized arguments.
221 @ Note that r2 must not remain equal to 0.
224 eor r1, r1, #0x00800000
226 eoreq r0, r0, #0x00800000
236 COND(mvn,s,ne) ip, r3, asr #24
242 @ Result is x + 0.0 = x or 0.0 + y = y.
250 @ Result is x - x = 0.
255 @ Result is x + x = 2x.
260 orrcs r0, r0, #0x80000000
262 2: adds r2, r2, #(2 << 24)
264 addcc r0, r0, #(1 << 23)
266 and r3, r0, #0x80000000
268 @ Overflow: return INF.
270 orr r0, r3, #0x7f000000
271 orr r0, r0, #0x00800000
274 @ At least one of r0/r1 is INF/NAN.
275 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
276 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
277 @ if r0 or r1 is NAN: return NAN
278 @ if opposite sign: return NAN
279 @ otherwise return r0 (which is INF or -INF)
284 COND(mvn,s,eq) r3, r3, asr #24
288 COND(mov,s,eq) r3, r1, lsl #9
290 orrne r0, r0, #0x00400000 @ quiet NAN
300 ARM_FUNC_START floatunsisf
301 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
307 ARM_FUNC_START floatsisf
308 ARM_FUNC_ALIAS aeabi_i2f floatsisf
310 ands r3, r0, #0x80000000
318 @ Add initial exponent to sign
319 orr r3, r3, #((127 + 23) << 23)
333 ARM_FUNC_START floatundisf
334 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
344 ARM_FUNC_START floatdisf
345 ARM_FUNC_ALIAS aeabi_l2f floatdisf
351 ands r3, ah, #0x80000000 @ sign bit in r3
353 #if defined(__thumb2__)
355 sbc ah, ah, ah, lsl #1
367 @ Add initial exponent to sign
368 orr r3, r3, #((127 + 23 + 32) << 23)
370 subeq r3, r3, #(32 << 23)
371 2: sub r3, r3, #(1 << 23)
378 movhs ip, ip, lsr #16
391 sublo r2, r2, ip, lsr #1
392 subs r2, r2, ip, lsr #3
401 sub r3, r3, r2, lsl #23
404 shiftop add r3 r3 ah lsl r2 ip
405 shift1 lsl, ip, al, r2
408 shiftop adc r0 r3 al lsr r2 r2
414 shift1 lsl, ip, ah, r2
416 orrs al, al, ip, lsl #1
417 shiftop adc r0 r3 ah lsr r2 r2
419 biceq r0, r0, ip, lsr #31
428 #endif /* L_addsubsf3 */
430 #ifdef L_arm_muldivsf3
432 ARM_FUNC_START mulsf3
433 ARM_FUNC_ALIAS aeabi_fmul mulsf3
436 @ Mask out exponents, trap any zero/denormal/INF/NAN.
438 ands r2, ip, r0, lsr #23
440 COND(and,s,ne) r3, ip, r1, lsr #23
446 @ Add exponents together
449 @ Determine final sign.
452 @ Convert mantissa to unsigned integer.
453 @ If power of two, branch to a separate path.
454 @ Make up for final alignment.
457 COND(mov,s,ne) r1, r1, lsl #9
460 orr r0, r3, r0, lsr #5
461 orr r1, r3, r1, lsr #5
465 @ Put sign bit in r3, which will be restored into r0 later.
466 and r3, ip, #0x80000000
468 @ Well, no way to make it shorter without the umull instruction.
469 do_push {r3, r4, r5} @ sp -= 12
470 .cfi_remember_state @ Save the current CFI state
471 .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12
472 .cfi_rel_offset r3, 0 @ Registers are saved from sp to sp + 8
473 .cfi_rel_offset r4, 4
474 .cfi_rel_offset r5, 8
478 bic r0, r0, r4, lsl #16
479 bic r1, r1, r5, lsl #16
484 adds r3, r3, r0, lsl #16
485 adc r1, ip, r0, lsr #16
486 do_pop {r0, r4, r5} @ sp += 12
487 .cfi_restore_state @ Restore the previous CFI state
491 @ The actual multiplication.
494 @ Put final sign in r0.
495 and r0, ip, #0x80000000
499 @ Adjust result upon the MSB position.
503 orrcc r1, r1, r3, lsr #31
506 @ Add sign to result.
509 @ Apply exponent bias, check for under/overflow.
514 @ Round the result, merge final exponent.
516 adc r0, r0, r2, lsl #23
521 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
524 and ip, ip, #0x80000000
527 orr r0, ip, r0, lsr #9
528 orr r0, r0, r1, lsr #9
531 COND(rsb,s,gt) r3, r2, #255
532 orrgt r0, r0, r2, lsl #23
535 @ Under/overflow: fix things up for the code below.
536 orr r0, r0, #0x00800000
544 @ Check if denormalized result is possible, otherwise return signed 0.
547 bicle r0, r0, #0x7fffffff
550 @ Shift value right, round, etc.
553 shift1 lsr, r1, r1, r2
555 shift1 lsl, ip, r0, r2
558 orrs r3, r3, ip, lsl #1
560 biceq r0, r0, ip, lsr #31
563 @ One or both arguments are denormalized.
564 @ Scale them leftwards and preserve sign bit.
567 and ip, r0, #0x80000000
570 tsteq r0, #0x00800000
575 and ip, r1, #0x80000000
578 tsteq r1, #0x00800000
585 @ Isolate the INF and NAN cases away
586 and r3, ip, r1, lsr #23
592 @ Here, one or more arguments are either denormalized or zero.
593 bics ip, r0, #0x80000000
595 COND(bic,s,ne) ip, r1, #0x80000000
598 @ Result is 0, but determine sign anyway.
601 bic r0, r0, #0x7fffffff
604 1: @ One or both args are INF or NAN.
607 teqne r0, #0x80000000
610 teqne r1, #0x80000000
611 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
615 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
621 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
623 @ Result is INF, but we need to determine its sign.
627 @ Overflow: return INF (sign already in r0).
629 and r0, r0, #0x80000000
630 orr r0, r0, #0x7f000000
631 orr r0, r0, #0x00800000
634 @ Return a quiet NAN.
636 orr r0, r0, #0x7f000000
637 orr r0, r0, #0x00c00000
644 ARM_FUNC_START divsf3
645 ARM_FUNC_ALIAS aeabi_fdiv divsf3
648 @ Mask out exponents, trap any zero/denormal/INF/NAN.
650 ands r2, ip, r0, lsr #23
652 COND(and,s,ne) r3, ip, r1, lsr #23
658 @ Subtract divisor exponent from dividend''s
661 @ Preserve final sign into ip.
664 @ Convert mantissa to unsigned integer.
665 @ Dividend -> r3, divisor -> r1.
670 orr r1, r3, r1, lsr #4
671 orr r3, r3, r0, lsr #4
673 @ Initialize r0 (result) with final sign bit.
674 and r0, ip, #0x80000000
676 @ Ensure result will land to known bit position.
677 @ Apply exponent bias accordingly.
681 adc r2, r2, #(127 - 2)
683 @ The actual division loop.
691 subcs r3, r3, r1, lsr #1
692 orrcs r0, r0, ip, lsr #1
695 subcs r3, r3, r1, lsr #2
696 orrcs r0, r0, ip, lsr #2
699 subcs r3, r3, r1, lsr #3
700 orrcs r0, r0, ip, lsr #3
703 COND(mov,s,ne) ip, ip, lsr #4
706 @ Check exponent for under/overflow.
710 @ Round the result, merge final exponent.
712 adc r0, r0, r2, lsl #23
717 @ Division by 0x1p*: let''s shortcut a lot of code.
719 and ip, ip, #0x80000000
720 orr r0, ip, r0, lsr #9
723 COND(rsb,s,gt) r3, r2, #255
724 orrgt r0, r0, r2, lsl #23
727 orr r0, r0, #0x00800000
732 @ One or both arguments are denormalized.
733 @ Scale them leftwards and preserve sign bit.
736 and ip, r0, #0x80000000
739 tsteq r0, #0x00800000
744 and ip, r1, #0x80000000
747 tsteq r1, #0x00800000
753 @ One or both arguments are either INF, NAN, zero or denormalized.
755 and r3, ip, r1, lsr #23
759 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
761 bne LSYM(Lml_i) @ INF / <anything> -> INF
763 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
767 beq LSYM(Lml_z) @ <anything> / INF -> 0
769 b LSYM(Lml_n) @ <anything> / NAN -> NAN
770 2: @ If both are nonzero, we need to normalize and resume above.
771 bics ip, r0, #0x80000000
773 COND(bic,s,ne) ip, r1, #0x80000000
775 @ One or both arguments are zero.
776 bics r2, r0, #0x80000000
777 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
778 bics r3, r1, #0x80000000
779 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
780 b LSYM(Lml_n) @ 0 / 0 -> NAN
786 #endif /* L_muldivsf3 */
790 @ The return value in r0 is
792 @ 0 if the operands are equal
793 @ 1 if the first operand is greater than the second, or
794 @ the operands are unordered and the operation is
795 @ CMP, LT, LE, NE, or EQ.
796 @ -1 if the first operand is less than the second, or
797 @ the operands are unordered and the operation is GT
800 @ The Z flag will be set iff the operands are equal.
802 @ The following registers are clobbered by this function:
806 ARM_FUNC_ALIAS gesf2 gtsf2
812 ARM_FUNC_ALIAS lesf2 ltsf2
816 ARM_FUNC_START cmpsf2
817 ARM_FUNC_ALIAS nesf2 cmpsf2
818 ARM_FUNC_ALIAS eqsf2 cmpsf2
819 mov ip, #1 @ how should we specify unordered here?
821 1: str ip, [sp, #-4]!
822 .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.
823 @ We're not adding CFI for ip as it's pushed into the stack only because
824 @ it may be popped off later as a return value (i.e. we're not preserving
827 @ Trap any INF/NAN first.
832 COND(mvn,s,ne) ip, r3, asr #24
835 @ Save the current CFI state. This is done because the branch is conditional,
836 @ and if we don't take it we'll issue a .cfi_adjust_cfa_offset and return.
837 @ If we do take it, however, the .cfi_adjust_cfa_offset from the non-branch
838 @ code will affect the branch code as well. To avoid this we'll restore
839 @ the current state before executing the branch code.
842 @ Note that 0.0 is equal to -0.0.
844 .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
846 orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
848 teqne r0, r1 @ if not 0 compare sign
850 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
854 movhi r0, r1, asr #31
856 mvnlo r0, r1, asr #31
863 @ Restore the previous CFI state (i.e. keep the CFI state as it was
864 @ before the branch).
871 4: mvns ip, r3, asr #24
874 beq 2b @ r1 is not NAN
876 5: ldr r0, [sp], #4 @ return unordered code.
877 .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.
889 ARM_FUNC_START aeabi_cfrcmple
897 ARM_FUNC_START aeabi_cfcmpeq
898 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
900 @ The status-returning routines are required to preserve all
901 @ registers except ip, lr, and cpsr.
902 6: do_push {r0, r1, r2, r3, lr}
903 .cfi_adjust_cfa_offset 20 @ CFA is at sp + previousOffset + 20
904 .cfi_rel_offset r0, 0 @ Registers are saved from sp to sp + 16
905 .cfi_rel_offset r1, 4
906 .cfi_rel_offset r2, 8
907 .cfi_rel_offset r3, 12
908 .cfi_rel_offset lr, 16
911 @ Set the Z flag correctly, and the C flag unconditionally.
913 @ Clear the C flag if the return value was -1, indicating
914 @ that the first operand was smaller than the second.
917 RETLDM "r0, r1, r2, r3"
920 FUNC_END aeabi_cfcmple
921 FUNC_END aeabi_cfcmpeq
922 FUNC_END aeabi_cfrcmple
924 ARM_FUNC_START aeabi_fcmpeq
927 str lr, [sp, #-8]! @ sp -= 8
928 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
929 .cfi_rel_offset lr, 0 @ lr is at sp
931 ARM_CALL aeabi_cfcmple
933 moveq r0, #1 @ Equal to.
934 movne r0, #0 @ Less than, greater than, or unordered.
938 FUNC_END aeabi_fcmpeq
940 ARM_FUNC_START aeabi_fcmplt
943 str lr, [sp, #-8]! @ sp -= 8
944 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
945 .cfi_rel_offset lr, 0 @ lr is at sp
947 ARM_CALL aeabi_cfcmple
949 movcc r0, #1 @ Less than.
950 movcs r0, #0 @ Equal to, greater than, or unordered.
954 FUNC_END aeabi_fcmplt
956 ARM_FUNC_START aeabi_fcmple
959 str lr, [sp, #-8]! @ sp -= 8
960 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
961 .cfi_rel_offset lr, 0 @ lr is at sp
963 ARM_CALL aeabi_cfcmple
965 movls r0, #1 @ Less than or equal to.
966 movhi r0, #0 @ Greater than or unordered.
970 FUNC_END aeabi_fcmple
972 ARM_FUNC_START aeabi_fcmpge
975 str lr, [sp, #-8]! @ sp -= 8
976 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
977 .cfi_rel_offset lr, 0 @ lr is at sp
979 ARM_CALL aeabi_cfrcmple
981 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
982 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
986 FUNC_END aeabi_fcmpge
988 ARM_FUNC_START aeabi_fcmpgt
991 str lr, [sp, #-8]! @ sp -= 8
992 .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8
993 .cfi_rel_offset lr, 0 @ lr is at sp
995 ARM_CALL aeabi_cfrcmple
997 movcc r0, #1 @ Operand 2 is less than operand 1.
998 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
999 @ or they are unordered.
1003 FUNC_END aeabi_fcmpgt
1005 #endif /* L_cmpsf2 */
1007 #ifdef L_arm_unordsf2
1009 ARM_FUNC_START unordsf2
1010 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
1015 mvns ip, r2, asr #24
1019 1: mvns ip, r3, asr #24
1023 2: mov r0, #0 @ arguments are ordered.
1025 3: mov r0, #1 @ arguments are unordered.
1029 FUNC_END aeabi_fcmpun
1032 #endif /* L_unordsf2 */
1034 #ifdef L_arm_fixsfsi
1036 ARM_FUNC_START fixsfsi
1037 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
1040 @ check exponent range.
1042 cmp r2, #(127 << 24)
1043 bcc 1f @ value is too small
1045 subs r2, r3, r2, lsr #24
1046 bls 2f @ value is too large
1050 orr r3, r3, #0x80000000
1051 tst r0, #0x80000000 @ the sign bit
1052 shift1 lsr, r0, r3, r2
1060 2: cmp r2, #(127 + 31 - 0xff)
1064 3: ands r0, r0, #0x80000000 @ the sign bit
1066 moveq r0, #0x7fffffff @ the maximum signed positive si
1069 4: mov r0, #0 @ What should we convert NAN to?
1076 #endif /* L_fixsfsi */
1078 #ifdef L_arm_fixunssfsi
1080 ARM_FUNC_START fixunssfsi
1081 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1084 @ check exponent range.
1086 bcs 1f @ value is negative
1087 cmp r2, #(127 << 24)
1088 bcc 1f @ value is too small
1090 subs r2, r3, r2, lsr #24
1091 bmi 2f @ value is too large
1095 orr r3, r3, #0x80000000
1096 shift1 lsr, r0, r3, r2
1102 2: cmp r2, #(127 + 31 - 0xff)
1106 3: mov r0, #0xffffffff @ maximum unsigned si
1109 4: mov r0, #0 @ What should we convert NAN to?
1113 FUNC_END aeabi_f2uiz
1116 #endif /* L_fixunssfsi */