1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
28 * The goal of this code is to be as fast as possible. This is
29 * not meant to be easy to understand for the casual reader.
31 * Only the default rounding mode is intended for best performances.
32 * Exceptions aren't supported yet, but that can be added quite easily
33 * if necessary without impacting performances.
39 ARM_FUNC_ALIAS aeabi_fneg negsf2
41 eor r0, r0, #0x80000000 @ flip sign bit
49 #ifdef L_arm_addsubsf3
51 ARM_FUNC_START aeabi_frsub
53 eor r0, r0, #0x80000000 @ flip sign bit of first arg
57 ARM_FUNC_ALIAS aeabi_fsub subsf3
59 eor r1, r1, #0x80000000 @ flip sign bit of second arg
60 #if defined(__INTERWORKING_STUBS__)
61 b 1f @ Skip Thumb-code prologue
65 ARM_FUNC_ALIAS aeabi_fadd addsf3
67 1: @ Look for zeroes, equal values, INF, or NAN.
70 COND(mov,s,ne) r3, r1, lsl #1
72 COND(mvn,s,ne) ip, r2, asr #24
73 COND(mvn,s,ne) ip, r3, asr #24
76 @ Compute exponent difference. Make largest exponent in r2,
77 @ corresponding arg in r0, and positive exponent difference in r3.
79 rsbs r3, r2, r3, lsr #24
88 @ If exponent difference is too large, return largest argument
89 @ already in r0. We need up to 25 bit to handle proper rounding
95 @ Convert mantissa to signed integer.
97 orr r0, r0, #0x00800000
98 bic r0, r0, #0xff000000
102 orr r1, r1, #0x00800000
103 bic r1, r1, #0xff000000
107 @ If exponent == difference, one or both args were denormalized.
108 @ Since this is not common case, rescale them off line.
113 @ Compensate for the exponent overlapping the mantissa MSB added later
116 @ Shift and add second arg to first arg in r0.
117 @ Keep leftover bits into r1.
118 shiftop adds r0 r0 r1 asr r3 ip
120 shift1 lsl, r1, r1, r3
122 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
123 and r3, r0, #0x80000000
125 #if defined(__thumb2__)
127 sbc r0, r0, r0, lsl #1
133 @ Determine how to normalize the result.
140 @ Result needs to be shifted right.
145 @ Make sure we did not bust our exponent.
149 @ Our result is now properly aligned into r0, remaining bits in r1.
150 @ Pack final result together.
151 @ Round with MSB of r1. If halfway between two numbers, round towards
155 adc r0, r0, r2, lsl #23
161 @ Result must be shifted left and exponent adjusted.
169 @ No rounding necessary since r1 will always be 0 at this point.
175 moveq r0, r0, lsl #12
195 shift1 lsl, r0, r0, ip
199 @ Final result with sign
200 @ If exponent negative, denormalize result.
202 addge r0, r0, r2, lsl #23
205 #if defined(__thumb2__)
210 orrlt r0, r3, r0, lsr r2
214 @ Fixup and adjust bit position for denormalized arguments.
215 @ Note that r2 must not remain equal to 0.
218 eor r1, r1, #0x00800000
220 eoreq r0, r0, #0x00800000
230 COND(mvn,s,ne) ip, r3, asr #24
236 @ Result is x + 0.0 = x or 0.0 + y = y.
244 @ Result is x - x = 0.
249 @ Result is x + x = 2x.
254 orrcs r0, r0, #0x80000000
256 2: adds r2, r2, #(2 << 24)
258 addcc r0, r0, #(1 << 23)
260 and r3, r0, #0x80000000
262 @ Overflow: return INF.
264 orr r0, r3, #0x7f000000
265 orr r0, r0, #0x00800000
268 @ At least one of r0/r1 is INF/NAN.
269 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
270 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
271 @ if r0 or r1 is NAN: return NAN
272 @ if opposite sign: return NAN
273 @ otherwise return r0 (which is INF or -INF)
278 COND(mvn,s,eq) r3, r3, asr #24
282 COND(mov,s,eq) r3, r1, lsl #9
284 orrne r0, r0, #0x00400000 @ quiet NAN
293 ARM_FUNC_START floatunsisf
294 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
299 ARM_FUNC_START floatsisf
300 ARM_FUNC_ALIAS aeabi_i2f floatsisf
302 ands r3, r0, #0x80000000
310 @ Add initial exponent to sign
311 orr r3, r3, #((127 + 23) << 23)
324 ARM_FUNC_START floatundisf
325 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
334 ARM_FUNC_START floatdisf
335 ARM_FUNC_ALIAS aeabi_l2f floatdisf
341 ands r3, ah, #0x80000000 @ sign bit in r3
343 #if defined(__thumb2__)
345 sbc ah, ah, ah, lsl #1
357 @ Add initial exponent to sign
358 orr r3, r3, #((127 + 23 + 32) << 23)
360 subeq r3, r3, #(32 << 23)
361 2: sub r3, r3, #(1 << 23)
368 movhs ip, ip, lsr #16
381 sublo r2, r2, ip, lsr #1
382 subs r2, r2, ip, lsr #3
391 sub r3, r3, r2, lsl #23
394 shiftop add r3 r3 ah lsl r2 ip
395 shift1 lsl, ip, al, r2
398 shiftop adc r0 r3 al lsr r2 r2
404 shift1 lsl, ip, ah, r2
406 orrs al, al, ip, lsl #1
407 shiftop adc r0 r3 ah lsr r2 r2
409 biceq r0, r0, ip, lsr #31
417 #endif /* L_addsubsf3 */
419 #ifdef L_arm_muldivsf3
421 ARM_FUNC_START mulsf3
422 ARM_FUNC_ALIAS aeabi_fmul mulsf3
424 @ Mask out exponents, trap any zero/denormal/INF/NAN.
426 ands r2, ip, r0, lsr #23
428 COND(and,s,ne) r3, ip, r1, lsr #23
434 @ Add exponents together
437 @ Determine final sign.
440 @ Convert mantissa to unsigned integer.
441 @ If power of two, branch to a separate path.
442 @ Make up for final alignment.
445 COND(mov,s,ne) r1, r1, lsl #9
448 orr r0, r3, r0, lsr #5
449 orr r1, r3, r1, lsr #5
453 @ Put sign bit in r3, which will be restored into r0 later.
454 and r3, ip, #0x80000000
456 @ Well, no way to make it shorter without the umull instruction.
460 bic r0, r0, r4, lsl #16
461 bic r1, r1, r5, lsl #16
466 adds r3, r3, r0, lsl #16
467 adc r1, ip, r0, lsr #16
472 @ The actual multiplication.
475 @ Put final sign in r0.
476 and r0, ip, #0x80000000
480 @ Adjust result upon the MSB position.
484 orrcc r1, r1, r3, lsr #31
487 @ Add sign to result.
490 @ Apply exponent bias, check for under/overflow.
495 @ Round the result, merge final exponent.
497 adc r0, r0, r2, lsl #23
502 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
505 and ip, ip, #0x80000000
508 orr r0, ip, r0, lsr #9
509 orr r0, r0, r1, lsr #9
512 COND(rsb,s,gt) r3, r2, #255
513 orrgt r0, r0, r2, lsl #23
516 @ Under/overflow: fix things up for the code below.
517 orr r0, r0, #0x00800000
525 @ Check if denormalized result is possible, otherwise return signed 0.
528 bicle r0, r0, #0x7fffffff
531 @ Shift value right, round, etc.
534 shift1 lsr, r1, r1, r2
536 shift1 lsl, ip, r0, r2
539 orrs r3, r3, ip, lsl #1
541 biceq r0, r0, ip, lsr #31
544 @ One or both arguments are denormalized.
545 @ Scale them leftwards and preserve sign bit.
548 and ip, r0, #0x80000000
551 tsteq r0, #0x00800000
556 and ip, r1, #0x80000000
559 tsteq r1, #0x00800000
566 @ Isolate the INF and NAN cases away
567 and r3, ip, r1, lsr #23
573 @ Here, one or more arguments are either denormalized or zero.
574 bics ip, r0, #0x80000000
576 COND(bic,s,ne) ip, r1, #0x80000000
579 @ Result is 0, but determine sign anyway.
582 bic r0, r0, #0x7fffffff
585 1: @ One or both args are INF or NAN.
588 teqne r0, #0x80000000
591 teqne r1, #0x80000000
592 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
596 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
602 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
604 @ Result is INF, but we need to determine its sign.
608 @ Overflow: return INF (sign already in r0).
610 and r0, r0, #0x80000000
611 orr r0, r0, #0x7f000000
612 orr r0, r0, #0x00800000
615 @ Return a quiet NAN.
617 orr r0, r0, #0x7f000000
618 orr r0, r0, #0x00c00000
624 ARM_FUNC_START divsf3
625 ARM_FUNC_ALIAS aeabi_fdiv divsf3
627 @ Mask out exponents, trap any zero/denormal/INF/NAN.
629 ands r2, ip, r0, lsr #23
631 COND(and,s,ne) r3, ip, r1, lsr #23
637 @ Subtract divisor exponent from dividend''s
640 @ Preserve final sign into ip.
643 @ Convert mantissa to unsigned integer.
644 @ Dividend -> r3, divisor -> r1.
649 orr r1, r3, r1, lsr #4
650 orr r3, r3, r0, lsr #4
652 @ Initialize r0 (result) with final sign bit.
653 and r0, ip, #0x80000000
655 @ Ensure result will land to known bit position.
656 @ Apply exponent bias accordingly.
660 adc r2, r2, #(127 - 2)
662 @ The actual division loop.
670 subcs r3, r3, r1, lsr #1
671 orrcs r0, r0, ip, lsr #1
674 subcs r3, r3, r1, lsr #2
675 orrcs r0, r0, ip, lsr #2
678 subcs r3, r3, r1, lsr #3
679 orrcs r0, r0, ip, lsr #3
682 COND(mov,s,ne) ip, ip, lsr #4
685 @ Check exponent for under/overflow.
689 @ Round the result, merge final exponent.
691 adc r0, r0, r2, lsl #23
696 @ Division by 0x1p*: let''s shortcut a lot of code.
698 and ip, ip, #0x80000000
699 orr r0, ip, r0, lsr #9
702 COND(rsb,s,gt) r3, r2, #255
703 orrgt r0, r0, r2, lsl #23
706 orr r0, r0, #0x00800000
711 @ One or both arguments are denormalized.
712 @ Scale them leftwards and preserve sign bit.
715 and ip, r0, #0x80000000
718 tsteq r0, #0x00800000
723 and ip, r1, #0x80000000
726 tsteq r1, #0x00800000
732 @ One or both arguments are either INF, NAN, zero or denormalized.
734 and r3, ip, r1, lsr #23
738 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
740 bne LSYM(Lml_i) @ INF / <anything> -> INF
742 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
746 beq LSYM(Lml_z) @ <anything> / INF -> 0
748 b LSYM(Lml_n) @ <anything> / NAN -> NAN
749 2: @ If both are nonzero, we need to normalize and resume above.
750 bics ip, r0, #0x80000000
752 COND(bic,s,ne) ip, r1, #0x80000000
754 @ One or both arguments are zero.
755 bics r2, r0, #0x80000000
756 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
757 bics r3, r1, #0x80000000
758 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
759 b LSYM(Lml_n) @ 0 / 0 -> NAN
764 #endif /* L_muldivsf3 */
768 @ The return value in r0 is
770 @ 0 if the operands are equal
771 @ 1 if the first operand is greater than the second, or
772 @ the operands are unordered and the operation is
773 @ CMP, LT, LE, NE, or EQ.
774 @ -1 if the first operand is less than the second, or
775 @ the operands are unordered and the operation is GT
778 @ The Z flag will be set iff the operands are equal.
780 @ The following registers are clobbered by this function:
784 ARM_FUNC_ALIAS gesf2 gtsf2
789 ARM_FUNC_ALIAS lesf2 ltsf2
793 ARM_FUNC_START cmpsf2
794 ARM_FUNC_ALIAS nesf2 cmpsf2
795 ARM_FUNC_ALIAS eqsf2 cmpsf2
796 mov ip, #1 @ how should we specify unordered here?
798 1: str ip, [sp, #-4]!
800 @ Trap any INF/NAN first.
805 COND(mvn,s,ne) ip, r3, asr #24
809 @ Note that 0.0 is equal to -0.0.
811 orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
813 teqne r0, r1 @ if not 0 compare sign
815 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
819 movhi r0, r1, asr #31
821 mvnlo r0, r1, asr #31
827 3: mvns ip, r2, asr #24
831 4: mvns ip, r3, asr #24
834 beq 2b @ r1 is not NAN
835 5: ldr r0, [sp], #4 @ return unordered code.
846 ARM_FUNC_START aeabi_cfrcmple
853 ARM_FUNC_START aeabi_cfcmpeq
854 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
856 @ The status-returning routines are required to preserve all
857 @ registers except ip, lr, and cpsr.
858 6: do_push {r0, r1, r2, r3, lr}
860 @ Set the Z flag correctly, and the C flag unconditionally.
862 @ Clear the C flag if the return value was -1, indicating
863 @ that the first operand was smaller than the second.
866 RETLDM "r0, r1, r2, r3"
868 FUNC_END aeabi_cfcmple
869 FUNC_END aeabi_cfcmpeq
870 FUNC_END aeabi_cfrcmple
872 ARM_FUNC_START aeabi_fcmpeq
875 ARM_CALL aeabi_cfcmple
877 moveq r0, #1 @ Equal to.
878 movne r0, #0 @ Less than, greater than, or unordered.
881 FUNC_END aeabi_fcmpeq
883 ARM_FUNC_START aeabi_fcmplt
886 ARM_CALL aeabi_cfcmple
888 movcc r0, #1 @ Less than.
889 movcs r0, #0 @ Equal to, greater than, or unordered.
892 FUNC_END aeabi_fcmplt
894 ARM_FUNC_START aeabi_fcmple
897 ARM_CALL aeabi_cfcmple
899 movls r0, #1 @ Less than or equal to.
900 movhi r0, #0 @ Greater than or unordered.
903 FUNC_END aeabi_fcmple
905 ARM_FUNC_START aeabi_fcmpge
908 ARM_CALL aeabi_cfrcmple
910 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
911 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
914 FUNC_END aeabi_fcmpge
916 ARM_FUNC_START aeabi_fcmpgt
919 ARM_CALL aeabi_cfrcmple
921 movcc r0, #1 @ Operand 2 is less than operand 1.
922 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
923 @ or they are unordered.
926 FUNC_END aeabi_fcmpgt
928 #endif /* L_cmpsf2 */
930 #ifdef L_arm_unordsf2
932 ARM_FUNC_START unordsf2
933 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
941 1: mvns ip, r3, asr #24
945 2: mov r0, #0 @ arguments are ordered.
947 3: mov r0, #1 @ arguments are unordered.
950 FUNC_END aeabi_fcmpun
953 #endif /* L_unordsf2 */
957 ARM_FUNC_START fixsfsi
958 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
960 @ check exponent range.
963 bcc 1f @ value is too small
965 subs r2, r3, r2, lsr #24
966 bls 2f @ value is too large
970 orr r3, r3, #0x80000000
971 tst r0, #0x80000000 @ the sign bit
972 shift1 lsr, r0, r3, r2
980 2: cmp r2, #(127 + 31 - 0xff)
984 3: ands r0, r0, #0x80000000 @ the sign bit
986 moveq r0, #0x7fffffff @ the maximum signed positive si
989 4: mov r0, #0 @ What should we convert NAN to?
995 #endif /* L_fixsfsi */
997 #ifdef L_arm_fixunssfsi
999 ARM_FUNC_START fixunssfsi
1000 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1002 @ check exponent range.
1004 bcs 1f @ value is negative
1005 cmp r2, #(127 << 24)
1006 bcc 1f @ value is too small
1008 subs r2, r3, r2, lsr #24
1009 bmi 2f @ value is too large
1013 orr r3, r3, #0x80000000
1014 shift1 lsr, r0, r3, r2
1020 2: cmp r2, #(127 + 31 - 0xff)
1024 3: mov r0, #0xffffffff @ maximum unsigned si
1027 4: mov r0, #0 @ What should we convert NAN to?
1030 FUNC_END aeabi_f2uiz
1033 #endif /* L_fixunssfsi */