1 /* ieee754-df.S double-precision floating point support for ARM
3 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 59 Temple Place - Suite 330,
28 Boston, MA 02111-1307, USA. */
33 * The goal of this code is to be as fast as possible. This is
34 * not meant to be easy to understand for the casual reader.
35 * For slightly simpler code please see the single precision version
38 * Only the default rounding mode is intended for best performances.
39 * Exceptions aren't supported yet, but that can be added quite easily
40 * if necessary without impacting performances.
44 @ For FPA, float words are always big-endian.
45 @ For VFP, floats words follow the memory system mode.
46 #if defined(__VFP_FP__) && !defined(__ARMEB__)
63 eor xh, xh, #0x80000000
73 @ flip sign bit of second arg
74 eor yh, yh, #0x80000000
75 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
76 b 1f @ Skip Thumb-code prologue
81 1: @ Compare both args, return zero if equal but the sign.
87 @ If first arg is 0 or -0, return second arg.
88 @ If second arg is 0 or -0, return first arg.
89 orrs ip, xl, xh, lsl #1
92 orrnes ip, yl, yh, lsl #1
95 stmfd sp!, {r4, r5, lr}
99 orr ip, ip, #0x00f00000
103 @ If either of them is 0x7ff, result will be INF or NAN
108 @ Compute exponent difference. Make largest exponent in r4,
109 @ corresponding arg in xh-xl, and positive exponent difference in r5.
122 @ If exponent difference is too large, return largest argument
123 @ already in xh-xl. We need up to 54 bit to handle proper rounding
128 @ Convert mantissa to signed integer.
130 bic xh, xh, ip, lsl #1
131 orr xh, xh, #0x00100000
137 bic yh, yh, ip, lsl #1
138 orr yh, yh, #0x00100000
143 @ If exponent == difference, one or both args were denormalized.
144 @ Since this is not common case, rescale them off line.
148 @ Scale down second arg with exponent difference.
149 @ Apply shift one bit left to first arg and the rest to second arg
150 @ to simplify things later, but only if exponent does not become 0.
157 adc xh, ip, xh, lsl #1
158 sub r4, r4, #(1 << 20)
162 @ Shift yh-yl right per r5, keep leftover bits into ip.
167 orr yl, yl, yh, lsl lr
173 adc ip, ip, yh, lsl lr
177 @ the actual addition
181 @ We now have a result in xh-xl-ip.
182 @ Keep absolute value in xh-xl-ip, sign in r5.
183 ands r5, xh, #0x80000000
189 @ Determine how to normalize the result.
198 @ Result needs to be shifted right.
203 add r4, r4, #(1 << 20)
209 add r4, r4, #(1 << 20)
211 @ Our result is now properly aligned into xh-xl, remaining bits in ip.
212 @ Round with MSB of ip. If halfway between two numbers, round towards
215 adds xl, xl, ip, lsr #31
220 @ One extreme rounding case may add a new MSB. Adjust exponent.
221 @ That MSB will be cleared when exponent is merged below.
223 addne r4, r4, #(1 << 20)
225 @ Make sure we did not bust our exponent.
226 adds ip, r4, #(1 << 20)
229 @ Pack final result together.
231 bic xh, xh, #0x00300000
237 @ Result must be shifted left and exponent adjusted.
238 @ No rounding necessary since ip will always be 0.
248 moveq r2, r2, lsl #16
273 @ determine how to shift the value.
279 @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
280 @ since a register switch happened above.
287 @ actually shift value left 1 to 20 bits, which might also represent
288 @ 32 to 52 bits if counting the register switch that happened earlier.
292 orrle xh, xh, xl, lsr ip
295 @ adjust exponent accordingly.
296 3: subs r4, r4, r3, lsl #20
299 @ Exponent too small, denormalize result.
300 @ Find out proper shift value.
307 @ shift result right of 1 to 20 bits, sign is in r5.
311 orr xl, xl, xh, lsl r2
312 orr xh, r5, xh, lsr r4
315 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
316 @ a register switch from xh to xl.
320 orr xl, xl, xh, lsl r4
324 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
326 2: mov xl, xh, lsr r4
330 @ Adjust exponents for denormalized arguments.
333 eoreq xh, xh, #0x00100000
334 addeq r4, r4, #(1 << 20)
335 eor yh, yh, #0x00100000
336 subne r5, r5, #(1 << 20)
339 @ Result is x - x = 0, unless x = INF or NAN.
341 sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000
344 orreq xh, ip, #0x00080000
349 @ Overflow: return INF.
351 orr xh, r5, #0x7f000000
352 orr xh, xh, #0x00f00000
356 @ At least one of x or y is INF/NAN.
357 @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
358 @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
359 @ if either is NAN: return NAN
360 @ if opposite sign: return NAN
361 @ return xh-xl (which is INF or -INF)
369 orrs r4, xl, xh, lsl #12
370 orreqs r4, yl, yh, lsl #12
372 orrne xh, r5, #0x00080000
379 ARM_FUNC_START floatunsidf
383 stmfd sp!, {r4, r5, lr}
384 mov r4, #(0x400 << 20) @ initial exponent
385 add r4, r4, #((52-1) << 20)
386 mov r5, #0 @ sign bit is 0
393 ARM_FUNC_START floatsidf
397 stmfd sp!, {r4, r5, lr}
398 mov r4, #(0x400 << 20) @ initial exponent
399 add r4, r4, #((52-1) << 20)
400 ands r5, r0, #0x80000000 @ sign bit in r5
401 rsbmi r0, r0, #0 @ absolute value
408 ARM_FUNC_START extendsfdf2
410 beq 1f @ value is 0.0 or -0.0
411 mov xh, r2, asr #3 @ stretch exponent
412 mov xh, xh, rrx @ retrieve sign bit
413 mov xl, r2, lsl #28 @ retrieve remaining bits
414 ands r2, r2, #0xff000000 @ isolate exponent
415 beq 2f @ exponent was 0 but not mantissa
416 teq r2, #0xff000000 @ check if INF or NAN
417 eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
424 2: @ value was denormalized. We can normalize it now.
425 stmfd sp!, {r4, r5, lr}
426 mov r4, #(0x380 << 20) @ setup corresponding exponent
427 add r4, r4, #(1 << 20)
428 and r5, xh, #0x80000000 @ move sign bit in r5
429 bic xh, xh, #0x80000000
434 #endif /* L_addsubdf3 */
438 ARM_FUNC_START muldf3
440 stmfd sp!, {r4, r5, r6, lr}
442 @ Mask out exponents.
444 orr ip, ip, #0x00f00000
453 @ Trap any multiplication by 0.
454 orrs r6, xl, xh, lsl #1
455 orrnes r6, yl, yh, lsl #1
458 @ Shift exponents right one bit to make room for overflow bit.
459 @ If either of them is 0, scale denormalized arguments off line.
460 @ Then add both exponents together.
465 add r4, r4, r5, asr #1
467 @ Preserve final sign in r4 along with exponent for now.
469 orrmi r4, r4, #0x8000
471 @ Convert mantissa to unsigned integer.
472 bic xh, xh, ip, lsl #1
473 bic yh, yh, ip, lsl #1
474 orr xh, xh, #0x00100000
475 orr yh, yh, #0x00100000
479 @ Well, no way to make it shorter without the umull instruction.
480 @ We must perform that 53 x 53 bit multiplication by hand.
481 stmfd sp!, {r7, r8, r9, sl, fp}
486 bic xl, xl, r7, lsl #16
487 bic yl, yl, r8, lsl #16
488 bic xh, xh, r9, lsl #16
489 bic yh, yh, sl, lsl #16
493 adds ip, ip, fp, lsl #16
494 adc lr, lr, fp, lsr #16
496 adds ip, ip, fp, lsl #16
497 adc lr, lr, fp, lsr #16
500 adds lr, lr, fp, lsl #16
501 adc r5, r5, fp, lsr #16
503 adds lr, lr, fp, lsl #16
504 adc r5, r5, fp, lsr #16
506 adds lr, lr, fp, lsl #16
507 adc r5, r5, fp, lsr #16
509 adds lr, lr, fp, lsl #16
510 adc r5, r5, fp, lsr #16
513 adds r5, r5, fp, lsl #16
514 adc r6, r6, fp, lsr #16
516 adds r5, r5, fp, lsl #16
517 adc r6, r6, fp, lsr #16
533 ldmfd sp!, {r7, r8, r9, sl, fp}
537 @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
547 @ The LSBs in ip are only significant for the final rounding.
548 @ Fold them into one bit of lr.
552 @ Put final sign in xh.
556 @ Adjust result if one extra MSB appeared (one of four times).
559 add r4, r4, #(1 << 19)
565 @ Scale back to 53 bits.
566 @ xh contains sign bit already.
567 orr xh, xh, r6, lsl #12
568 orr xh, xh, r5, lsr #20
570 orr xl, xl, lr, lsr #20
572 @ Apply exponent bias, check range for underflow.
573 sub r4, r4, #0x00f80000
574 subs r4, r4, #0x1f000000
585 @ Rounding may have produced an extra MSB here.
586 @ The extra bit is cleared before merging the exponent below.
588 addne r4, r4, #(1 << 19)
590 @ Check exponent for overflow.
591 adds ip, r4, #(1 << 19)
595 @ Add final exponent.
596 bic xh, xh, #0x00300000
597 orr xh, xh, r4, lsl #1
600 @ Result is 0, but determine sign anyway.
604 bic xh, xh, #0x7fffffff
608 @ Check if denormalized result is possible, otherwise return signed 0.
612 bicle xh, xh, #0x7fffffff
613 RETLDM "r4, r5, r6" le
615 @ Find out proper shift value.
623 @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
628 orr xl, xl, xh, lsl r5
632 adds xl, xl, r3, lsr #31
635 teqeq r3, #0x80000000
639 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
640 @ a register switch from xh to xl. Then round.
645 orr xl, xl, xh, lsl r4
646 bic xh, xh, #0x7fffffff
647 adds xl, xl, r3, lsr #31
650 teqeq r3, #0x80000000
654 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
655 @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
659 orr r3, r3, xh, lsl r5
661 bic xh, xh, #0x7fffffff
662 adds xl, xl, r3, lsr #31
665 teqeq r3, #0x80000000
669 @ One or both arguments are denormalized.
670 @ Scale them leftwards and preserve sign bit.
675 and r6, xh, #0x80000000
676 1: movs xl, xl, lsl #1
677 adc xh, lr, xh, lsl #1
679 subeq r4, r4, #(1 << 19)
684 2: and r6, yh, #0x80000000
685 3: movs yl, yl, lsl #1
686 adc yh, lr, yh, lsl #1
688 subeq r5, r5, #(1 << 20)
693 @ One or both args are INF or NAN.
695 orrs r6, xl, xh, lsl #1
696 orrnes r6, yl, yh, lsl #1
697 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
700 orrs r6, xl, xh, lsl #12
701 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
704 orrs r6, yl, yh, lsl #12
705 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
707 @ Result is INF, but we need to determine its sign.
711 @ Overflow: return INF (sign already in xh).
713 and xh, xh, #0x80000000
714 orr xh, xh, #0x7f000000
715 orr xh, xh, #0x00f00000
722 orr xh, xh, #0x00f80000
727 ARM_FUNC_START divdf3
729 stmfd sp!, {r4, r5, r6, lr}
731 @ Mask out exponents.
733 orr ip, ip, #0x00f00000
737 @ Trap any INF/NAN or zeroes.
740 orrnes r6, xl, xh, lsl #1
741 orrnes r6, yl, yh, lsl #1
744 @ Shift exponents right one bit to make room for overflow bit.
745 @ If either of them is 0, scale denormalized arguments off line.
746 @ Then substract divisor exponent from dividend''s.
751 sub r4, r4, r5, asr #1
753 @ Preserve final sign into lr.
756 @ Convert mantissa to unsigned integer.
757 @ Dividend -> r5-r6, divisor -> yh-yl.
760 orr yh, r5, yh, lsr #4
761 orr yh, yh, yl, lsr #24
766 orr r5, r5, xh, lsr #4
767 orr r5, r5, xl, lsr #24
770 @ Initialize xh with final sign bit.
771 and xh, lr, #0x80000000
773 @ Ensure result will land to known bit position.
777 sub r4, r4, #(1 << 19)
781 @ Apply exponent bias, check range for over/underflow.
782 add r4, r4, #0x1f000000
783 add r4, r4, #0x00f80000
789 @ Perform first substraction to align result to a nibble.
797 @ The actual division loop.
809 orrcs xl, xl, ip, lsr #1
816 orrcs xl, xl, ip, lsr #2
823 orrcs xl, xl, ip, lsr #3
828 orr r5, r5, r6, lsr #28
831 orr yh, yh, yl, lsr #29
836 @ We are done with a word of the result.
837 @ Loop again for the low word if this pass was for the high word.
845 @ Be sure result starts in the high word.
850 @ Check if denormalized result is needed.
854 @ Apply proper rounding.
862 @ Add exponent to result.
863 bic xh, xh, #0x00100000
864 orr xh, xh, r4, lsl #1
867 @ Division by 0x1p*: shortcut a lot of code.
869 and lr, lr, #0x80000000
870 orr xh, lr, xh, lsr #12
871 add r4, r4, #0x1f000000
872 add r4, r4, #0x00f80000
876 orrgt xh, xh, r4, lsl #1
877 RETLDM "r4, r5, r6" gt
881 orr xh, xh, #0x00100000
885 @ Result must be denormalized: put remainder in lr for
886 @ rounding considerations.
891 @ One or both arguments are denormalized.
892 @ Scale them leftwards and preserve sign bit.
897 and r6, xh, #0x80000000
898 1: movs xl, xl, lsl #1
899 adc xh, lr, xh, lsl #1
901 subeq r4, r4, #(1 << 19)
906 2: and r6, yh, #0x80000000
907 3: movs yl, yl, lsl #1
908 adc yh, lr, yh, lsl #1
910 subeq r5, r5, #(1 << 20)
915 @ One or both arguments is either INF, NAN or zero.
919 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
922 orrs r4, xl, xh, lsl #12
923 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
924 b LSYM(Lml_i) @ INF / <anything> -> INF
927 orrs r5, yl, yh, lsl #12
928 bne LSYM(Lml_n) @ <anything> / NAN -> NAN
929 b LSYM(Lml_z) @ <anything> / INF -> 0
930 2: @ One or both arguments are 0.
931 orrs r4, xl, xh, lsl #1
932 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
933 orrs r5, yl, yh, lsl #1
934 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
935 b LSYM(Lml_n) @ 0 / 0 -> NAN
939 #endif /* L_muldivdf3 */
944 ARM_FUNC_ALIAS gedf2 gtdf2
949 ARM_FUNC_ALIAS ledf2 ltdf2
953 ARM_FUNC_START cmpdf2
954 ARM_FUNC_ALIAS nedf2 cmpdf2
955 ARM_FUNC_ALIAS eqdf2 cmpdf2
956 mov ip, #1 @ how should we specify unordered here?
958 1: stmfd sp!, {r4, r5, lr}
960 @ Trap any INF/NAN first.
962 orr lr, lr, #0x00f00000
970 @ Note that 0.0 is equal to -0.0.
971 2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
972 orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
973 teqne xh, yh @ or xh == yh
974 teqeq xl, yl @ and xl == yl
975 moveq r0, #0 @ then equal.
978 @ Check for sign difference.
980 movmi r0, xh, asr #31
987 @ Compare mantissa if exponents are equal.
988 moveq xh, xh, lsl #12
989 cmpeq xh, yh, lsl #12
991 movcs r0, yh, asr #31
992 mvncc r0, yh, asr #31
999 orrs xl, xl, xh, lsl #12
1003 orrs yl, yl, yh, lsl #12
1004 beq 2b @ y is not NAN
1005 5: mov r0, ip @ return unordered code from ip
1016 #endif /* L_cmpdf2 */
1020 ARM_FUNC_START unorddf2
1023 orr ip, ip, #0x00f00000
1027 orrs xl, xl, xh, lsl #12
1032 orrs yl, yl, yh, lsl #12
1034 2: mov r0, #0 @ arguments are ordered.
1037 3: mov r0, #1 @ arguments are unordered.
1042 #endif /* L_unorddf2 */
1046 ARM_FUNC_START fixdfsi
1047 orrs ip, xl, xh, lsl #1
1048 beq 1f @ value is 0.
1050 mov r3, r3, rrx @ preserve C flag (the actual sign)
1052 @ check exponent range.
1054 orr ip, ip, #0x00f00000
1057 beq 2f @ value is INF or NAN
1058 bic ip, ip, #0x40000000
1060 bcc 1f @ value is too small
1061 add ip, ip, #(31 << 20)
1063 bcs 3f @ value is too large
1067 orr ip, ip, #0x80000000
1068 orr ip, ip, xl, lsr #21
1070 tst r3, #0x80000000 @ the sign bit
1078 2: orrs xl, xl, xh, lsl #12
1080 3: ands r0, r3, #0x80000000 @ the sign bit
1081 moveq r0, #0x7fffffff @ maximum signed positive si
1084 4: mov r0, #0 @ How should we convert NAN?
1089 #endif /* L_fixdfsi */
1093 ARM_FUNC_START fixunsdfsi
1094 orrs ip, xl, xh, lsl #1
1095 movcss r0, #0 @ value is negative
1096 RETc(eq) @ or 0 (xl, xh overlap r0)
1098 @ check exponent range.
1100 orr ip, ip, #0x00f00000
1103 beq 2f @ value is INF or NAN
1104 bic ip, ip, #0x40000000
1106 bcc 1f @ value is too small
1107 add ip, ip, #(31 << 20)
1109 bhi 3f @ value is too large
1113 orr ip, ip, #0x80000000
1114 orr ip, ip, xl, lsr #21
1122 2: orrs xl, xl, xh, lsl #12
1123 bne 4f @ value is NAN.
1124 3: mov r0, #0xffffffff @ maximum unsigned si
1127 4: mov r0, #0 @ How should we convert NAN?
1132 #endif /* L_fixunsdfsi */
1136 ARM_FUNC_START truncdfsf2
1137 orrs r2, xl, xh, lsl #1
1139 RETc(eq) @ value is 0.0 or -0.0
1141 @ check exponent range.
1143 orr ip, ip, #0x00f00000
1146 beq 2f @ value is INF or NAN
1148 cmp r2, #(0x380 << 20)
1149 bls 4f @ value is too small
1151 @ shift and round mantissa
1152 1: movs r3, xl, lsr #29
1153 adc r3, r3, xh, lsl #3
1155 @ if halfway between two numbers, round towards LSB = 0.
1160 @ rounding might have created an extra MSB. If so adjust exponent.
1162 addne r2, r2, #(1 << 20)
1163 bicne r3, r3, #0x00800000
1165 @ check exponent for overflow
1166 mov ip, #(0x400 << 20)
1167 orr ip, ip, #(0x07f << 20)
1171 @ adjust exponent, merge with sign bit and mantissa.
1175 eor r0, r0, #0x40000000
1179 orrs xl, xl, xh, lsl #12
1180 movne r0, #0x7f000000
1181 orrne r0, r0, #0x00c00000
1182 RETc(ne) @ return NAN
1184 3: @ return INF with sign
1185 and r0, xh, #0x80000000
1186 orr r0, r0, #0x7f000000
1187 orr r0, r0, #0x00800000
1190 4: @ check if denormalized value is possible
1191 subs r2, r2, #((0x380 - 24) << 20)
1192 andle r0, xh, #0x80000000 @ too small, return signed 0.
1195 @ denormalize value so we can resume with the code above afterwards.
1196 orr xh, xh, #0x00100000
1205 orr xl, xl, xh, lsl ip
1209 5: teq r3, #0 @ fold r3 bits into the LSB
1210 orrne xl, xl, #1 @ for rounding considerations.
1211 mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
1214 6: rsb r2, r2, #(12 + 20)
1218 orr xl, xl, xh, lsl r2
1219 and xh, xh, #0x80000000
1224 #endif /* L_truncdfsf2 */