1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28 Boston, MA 02110-1301, USA. */
33 * The goal of this code is to be as fast as possible. This is
34 * not meant to be easy to understand for the casual reader.
36 * Only the default rounding mode is intended for best performances.
37 * Exceptions aren't supported yet, but that can be added quite easily
38 * if necessary without impacting performances.
44 ARM_FUNC_ALIAS aeabi_fneg negsf2
46 eor r0, r0, #0x80000000 @ flip sign bit
56 ARM_FUNC_START aeabi_frsub
58 eor r0, r0, #0x80000000 @ flip sign bit of first arg
62 ARM_FUNC_ALIAS aeabi_fsub subsf3
64 eor r1, r1, #0x80000000 @ flip sign bit of second arg
65 #if defined(__INTERWORKING_STUBS__)
66 b 1f @ Skip Thumb-code prologue
70 ARM_FUNC_ALIAS aeabi_fadd addsf3
72 1: @ Look for zeroes, equal values, INF, or NAN.
75 COND(mov,s,ne) r3, r1, lsl #1
77 COND(mvn,s,ne) ip, r2, asr #24
78 COND(mvn,s,ne) ip, r3, asr #24
81 @ Compute exponent difference. Make largest exponent in r2,
82 @ corresponding arg in r0, and positive exponent difference in r3.
84 rsbs r3, r2, r3, lsr #24
93 @ If exponent difference is too large, return largest argument
94 @ already in r0. We need up to 25 bit to handle proper rounding
100 @ Convert mantissa to signed integer.
102 orr r0, r0, #0x00800000
103 bic r0, r0, #0xff000000
107 orr r1, r1, #0x00800000
108 bic r1, r1, #0xff000000
112 @ If exponent == difference, one or both args were denormalized.
113 @ Since this is not common case, rescale them off line.
118 @ Compensate for the exponent overlapping the mantissa MSB added later
121 @ Shift and add second arg to first arg in r0.
122 @ Keep leftover bits into r1.
123 shiftop adds r0 r0 r1 asr r3 ip
125 shift1 lsl, r1, r1, r3
127 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
128 and r3, r0, #0x80000000
130 #if defined(__thumb2__)
132 sbc r0, r0, r0, lsl #1
138 @ Determine how to normalize the result.
145 @ Result needs to be shifted right.
150 @ Make sure we did not bust our exponent.
154 @ Our result is now properly aligned into r0, remaining bits in r1.
155 @ Pack final result together.
156 @ Round with MSB of r1. If halfway between two numbers, round towards
160 adc r0, r0, r2, lsl #23
166 @ Result must be shifted left and exponent adjusted.
174 @ No rounding necessary since r1 will always be 0 at this point.
180 moveq r0, r0, lsl #12
200 shift1 lsl, r0, r0, ip
204 @ Final result with sign
205 @ If exponent negative, denormalize result.
207 addge r0, r0, r2, lsl #23
210 #if defined(__thumb2__)
215 orrlt r0, r3, r0, lsr r2
219 @ Fixup and adjust bit position for denormalized arguments.
220 @ Note that r2 must not remain equal to 0.
223 eor r1, r1, #0x00800000
225 eoreq r0, r0, #0x00800000
235 COND(mvn,s,ne) ip, r3, asr #24
241 @ Result is x + 0.0 = x or 0.0 + y = y.
249 @ Result is x - x = 0.
254 @ Result is x + x = 2x.
259 orrcs r0, r0, #0x80000000
261 2: adds r2, r2, #(2 << 24)
263 addcc r0, r0, #(1 << 23)
265 and r3, r0, #0x80000000
267 @ Overflow: return INF.
269 orr r0, r3, #0x7f000000
270 orr r0, r0, #0x00800000
273 @ At least one of r0/r1 is INF/NAN.
274 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
275 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
276 @ if r0 or r1 is NAN: return NAN
277 @ if opposite sign: return NAN
278 @ otherwise return r0 (which is INF or -INF)
283 COND(mvn,s,eq) r3, r3, asr #24
287 COND(mov,s,eq) r3, r1, lsl #9
289 orrne r0, r0, #0x00400000 @ quiet NAN
298 ARM_FUNC_START floatunsisf
299 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
304 ARM_FUNC_START floatsisf
305 ARM_FUNC_ALIAS aeabi_i2f floatsisf
307 ands r3, r0, #0x80000000
315 @ Add initial exponent to sign
316 orr r3, r3, #((127 + 23) << 23)
329 ARM_FUNC_START floatundisf
330 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
333 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
344 ARM_FUNC_START floatdisf
345 ARM_FUNC_ALIAS aeabi_l2f floatdisf
348 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
356 ands r3, ah, #0x80000000 @ sign bit in r3
358 #if defined(__thumb2__)
360 sbc ah, ah, ah, lsl #1
366 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
367 @ For hard FPA code we want to return via the tail below so that
368 @ we can return the result in f0 as well as in r0 for backwards
380 @ Add initial exponent to sign
381 orr r3, r3, #((127 + 23 + 32) << 23)
383 subeq r3, r3, #(32 << 23)
384 2: sub r3, r3, #(1 << 23)
391 movhs ip, ip, lsr #16
404 sublo r2, r2, ip, lsr #1
405 subs r2, r2, ip, lsr #3
414 sub r3, r3, r2, lsl #23
417 shiftop add r3 r3 ah lsl r2 ip
418 shift1 lsl, ip, al, r2
421 shiftop adc r0 r3 al lsr r2 r2
427 shift1 lsl, ip, ah, r2
429 orrs al, al, ip, lsl #1
430 shiftop adc r0 r3 ah lsr r2 r2
432 biceq r0, r0, ip, lsr #31
435 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
449 #endif /* L_addsubsf3 */
453 ARM_FUNC_START mulsf3
454 ARM_FUNC_ALIAS aeabi_fmul mulsf3
456 @ Mask out exponents, trap any zero/denormal/INF/NAN.
458 ands r2, ip, r0, lsr #23
460 COND(and,s,ne) r3, ip, r1, lsr #23
466 @ Add exponents together
469 @ Determine final sign.
472 @ Convert mantissa to unsigned integer.
473 @ If power of two, branch to a separate path.
474 @ Make up for final alignment.
477 COND(mov,s,ne) r1, r1, lsl #9
480 orr r0, r3, r0, lsr #5
481 orr r1, r3, r1, lsr #5
485 @ Put sign bit in r3, which will be restored into r0 later.
486 and r3, ip, #0x80000000
488 @ Well, no way to make it shorter without the umull instruction.
492 bic r0, r0, r4, lsl #16
493 bic r1, r1, r5, lsl #16
498 adds r3, r3, r0, lsl #16
499 adc r1, ip, r0, lsr #16
504 @ The actual multiplication.
507 @ Put final sign in r0.
508 and r0, ip, #0x80000000
512 @ Adjust result upon the MSB position.
516 orrcc r1, r1, r3, lsr #31
519 @ Add sign to result.
522 @ Apply exponent bias, check for under/overflow.
527 @ Round the result, merge final exponent.
529 adc r0, r0, r2, lsl #23
534 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
537 and ip, ip, #0x80000000
540 orr r0, ip, r0, lsr #9
541 orr r0, r0, r1, lsr #9
544 COND(rsb,s,gt) r3, r2, #255
545 orrgt r0, r0, r2, lsl #23
548 @ Under/overflow: fix things up for the code below.
549 orr r0, r0, #0x00800000
557 @ Check if denormalized result is possible, otherwise return signed 0.
560 bicle r0, r0, #0x7fffffff
563 @ Shift value right, round, etc.
566 shift1 lsr, r1, r1, r2
568 shift1 lsl, ip, r0, r2
571 orrs r3, r3, ip, lsl #1
573 biceq r0, r0, ip, lsr #31
576 @ One or both arguments are denormalized.
577 @ Scale them leftwards and preserve sign bit.
580 and ip, r0, #0x80000000
583 tsteq r0, #0x00800000
588 and ip, r1, #0x80000000
591 tsteq r1, #0x00800000
598 @ Isolate the INF and NAN cases away
599 and r3, ip, r1, lsr #23
605 @ Here, one or more arguments are either denormalized or zero.
606 bics ip, r0, #0x80000000
608 COND(bic,s,ne) ip, r1, #0x80000000
611 @ Result is 0, but determine sign anyway.
614 bic r0, r0, #0x7fffffff
617 1: @ One or both args are INF or NAN.
620 teqne r0, #0x80000000
623 teqne r1, #0x80000000
624 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
628 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
634 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
636 @ Result is INF, but we need to determine its sign.
640 @ Overflow: return INF (sign already in r0).
642 and r0, r0, #0x80000000
643 orr r0, r0, #0x7f000000
644 orr r0, r0, #0x00800000
647 @ Return a quiet NAN.
649 orr r0, r0, #0x7f000000
650 orr r0, r0, #0x00c00000
656 ARM_FUNC_START divsf3
657 ARM_FUNC_ALIAS aeabi_fdiv divsf3
659 @ Mask out exponents, trap any zero/denormal/INF/NAN.
661 ands r2, ip, r0, lsr #23
663 COND(and,s,ne) r3, ip, r1, lsr #23
669 @ Substract divisor exponent from dividend''s
672 @ Preserve final sign into ip.
675 @ Convert mantissa to unsigned integer.
676 @ Dividend -> r3, divisor -> r1.
681 orr r1, r3, r1, lsr #4
682 orr r3, r3, r0, lsr #4
684 @ Initialize r0 (result) with final sign bit.
685 and r0, ip, #0x80000000
687 @ Ensure result will land to known bit position.
688 @ Apply exponent bias accordingly.
692 adc r2, r2, #(127 - 2)
694 @ The actual division loop.
702 subcs r3, r3, r1, lsr #1
703 orrcs r0, r0, ip, lsr #1
706 subcs r3, r3, r1, lsr #2
707 orrcs r0, r0, ip, lsr #2
710 subcs r3, r3, r1, lsr #3
711 orrcs r0, r0, ip, lsr #3
714 COND(mov,s,ne) ip, ip, lsr #4
717 @ Check exponent for under/overflow.
721 @ Round the result, merge final exponent.
723 adc r0, r0, r2, lsl #23
728 @ Division by 0x1p*: let''s shortcut a lot of code.
730 and ip, ip, #0x80000000
731 orr r0, ip, r0, lsr #9
734 COND(rsb,s,gt) r3, r2, #255
735 orrgt r0, r0, r2, lsl #23
738 orr r0, r0, #0x00800000
743 @ One or both arguments are denormalized.
744 @ Scale them leftwards and preserve sign bit.
747 and ip, r0, #0x80000000
750 tsteq r0, #0x00800000
755 and ip, r1, #0x80000000
758 tsteq r1, #0x00800000
764 @ One or both arguments are either INF, NAN, zero or denormalized.
766 and r3, ip, r1, lsr #23
770 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
772 bne LSYM(Lml_i) @ INF / <anything> -> INF
774 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
778 beq LSYM(Lml_z) @ <anything> / INF -> 0
780 b LSYM(Lml_n) @ <anything> / NAN -> NAN
781 2: @ If both are nonzero, we need to normalize and resume above.
782 bics ip, r0, #0x80000000
784 COND(bic,s,ne) ip, r1, #0x80000000
786 @ One or both arguments are zero.
787 bics r2, r0, #0x80000000
788 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
789 bics r3, r1, #0x80000000
790 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
791 b LSYM(Lml_n) @ 0 / 0 -> NAN
796 #endif /* L_muldivsf3 */
800 @ The return value in r0 is
802 @ 0 if the operands are equal
803 @ 1 if the first operand is greater than the second, or
804 @ the operands are unordered and the operation is
805 @ CMP, LT, LE, NE, or EQ.
806 @ -1 if the first operand is less than the second, or
807 @ the operands are unordered and the operation is GT
810 @ The Z flag will be set iff the operands are equal.
812 @ The following registers are clobbered by this function:
816 ARM_FUNC_ALIAS gesf2 gtsf2
821 ARM_FUNC_ALIAS lesf2 ltsf2
825 ARM_FUNC_START cmpsf2
826 ARM_FUNC_ALIAS nesf2 cmpsf2
827 ARM_FUNC_ALIAS eqsf2 cmpsf2
828 mov ip, #1 @ how should we specify unordered here?
832 @ Trap any INF/NAN first.
837 COND(mvn,s,ne) ip, r3, asr #24
841 @ Note that 0.0 is equal to -0.0.
842 2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
844 teqne r0, r1 @ if not 0 compare sign
846 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
850 movhi r0, r1, asr #31
852 mvnlo r0, r1, asr #31
858 3: mvns ip, r2, asr #24
862 4: mvns ip, r3, asr #24
865 beq 2b @ r1 is not NAN
866 5: ldr r0, [sp, #-4] @ return unordered code.
877 ARM_FUNC_START aeabi_cfrcmple
884 ARM_FUNC_START aeabi_cfcmpeq
885 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
887 @ The status-returning routines are required to preserve all
888 @ registers except ip, lr, and cpsr.
889 6: do_push {r0, r1, r2, r3, lr}
891 @ Set the Z flag correctly, and the C flag unconditionally.
893 @ Clear the C flag if the return value was -1, indicating
894 @ that the first operand was smaller than the second.
897 RETLDM "r0, r1, r2, r3"
899 FUNC_END aeabi_cfcmple
900 FUNC_END aeabi_cfcmpeq
901 FUNC_END aeabi_cfrcmple
903 ARM_FUNC_START aeabi_fcmpeq
906 ARM_CALL aeabi_cfcmple
908 moveq r0, #1 @ Equal to.
909 movne r0, #0 @ Less than, greater than, or unordered.
912 FUNC_END aeabi_fcmpeq
914 ARM_FUNC_START aeabi_fcmplt
917 ARM_CALL aeabi_cfcmple
919 movcc r0, #1 @ Less than.
920 movcs r0, #0 @ Equal to, greater than, or unordered.
923 FUNC_END aeabi_fcmplt
925 ARM_FUNC_START aeabi_fcmple
928 ARM_CALL aeabi_cfcmple
930 movls r0, #1 @ Less than or equal to.
931 movhi r0, #0 @ Greater than or unordered.
934 FUNC_END aeabi_fcmple
936 ARM_FUNC_START aeabi_fcmpge
939 ARM_CALL aeabi_cfrcmple
941 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
942 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
945 FUNC_END aeabi_fcmpge
947 ARM_FUNC_START aeabi_fcmpgt
950 ARM_CALL aeabi_cfrcmple
952 movcc r0, #1 @ Operand 2 is less than operand 1.
953 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
954 @ or they are unordered.
957 FUNC_END aeabi_fcmpgt
959 #endif /* L_cmpsf2 */
963 ARM_FUNC_START unordsf2
964 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
972 1: mvns ip, r3, asr #24
976 2: mov r0, #0 @ arguments are ordered.
978 3: mov r0, #1 @ arguments are unordered.
981 FUNC_END aeabi_fcmpun
984 #endif /* L_unordsf2 */
988 ARM_FUNC_START fixsfsi
989 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
991 @ check exponent range.
994 bcc 1f @ value is too small
996 subs r2, r3, r2, lsr #24
997 bls 2f @ value is too large
1001 orr r3, r3, #0x80000000
1002 tst r0, #0x80000000 @ the sign bit
1003 shift1 lsr, r0, r3, r2
1011 2: cmp r2, #(127 + 31 - 0xff)
1015 3: ands r0, r0, #0x80000000 @ the sign bit
1017 moveq r0, #0x7fffffff @ the maximum signed positive si
1020 4: mov r0, #0 @ What should we convert NAN to?
1026 #endif /* L_fixsfsi */
1030 ARM_FUNC_START fixunssfsi
1031 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1033 @ check exponent range.
1035 bcs 1f @ value is negative
1036 cmp r2, #(127 << 24)
1037 bcc 1f @ value is too small
1039 subs r2, r3, r2, lsr #24
1040 bmi 2f @ value is too large
1044 orr r3, r3, #0x80000000
1045 shift1 lsr, r0, r3, r2
1051 2: cmp r2, #(127 + 31 - 0xff)
1055 3: mov r0, #0xffffffff @ maximum unsigned si
1058 4: mov r0, #0 @ What should we convert NAN to?
1061 FUNC_END aeabi_f2uiz
1064 #endif /* L_fixunssfsi */