1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 59 Temple Place - Suite 330,
28 Boston, MA 02111-1307, USA. */
33 * The goal of this code is to be as fast as possible. This is
34 * not meant to be easy to understand for the casual reader.
36 * Only the default rounding mode is intended for best performances.
37 * Exceptions aren't supported yet, but that can be added quite easily
38 * if necessary without impacting performances.
41 @ This selects the minimum architecture level required.
43 #define __ARM_ARCH__ 3
45 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
46 || defined(__ARM_ARCH_4T__)
48 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
49 long multiply instructions. That includes v3M. */
50 #define __ARM_ARCH__ 4
53 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
54 || defined(__ARM_ARCH_5TE__)
56 #define __ARM_ARCH__ 5
59 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
63 #define RETc(x) bx##x lr
64 #if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
65 #define __FP_INTERWORKING__
69 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
70 .macro ARM_FUNC_START name
77 .macro ARM_FUNC_START name
83 eor r0, r0, #0x80000000 @ flip sign bit
87 eor r1, r1, #0x80000000 @ flip sign bit of second arg
88 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
89 b 1f @ Skip Thumb-code prologue
94 1: @ Compare both args, return zero if equal but the sign.
99 @ If first arg is 0 or -0, return second arg.
100 @ If second arg is 0 or -0, return first arg.
101 bics r2, r0, #0x80000000
103 bicnes r2, r1, #0x80000000
106 @ Mask out exponents.
108 and r2, r0, ip, lsr #1
109 and r3, r1, ip, lsr #1
111 @ If either of them is 255, result will be INF or NAN
116 @ Compute exponent difference. Make largest exponent in r2,
117 @ corresponding arg in r0, and positive exponent difference in r3.
125 @ If exponent difference is too large, return largest argument
126 @ already in r0. We need up to 25 bit to handle proper rounding
131 @ Convert mantissa to signed integer.
133 orr r0, r0, #0x00800000
134 bic r0, r0, #0xff000000
137 orr r1, r1, #0x00800000
138 bic r1, r1, #0xff000000
141 @ If exponent == difference, one or both args were denormalized.
142 @ Since this is not common case, rescale them off line.
147 @ Scale down second arg with exponent difference.
148 @ Apply shift one bit left to first arg and the rest to second arg
149 @ to simplify things later, but only if exponent does not become 0.
153 subne r2, r2, #(1 << 23)
156 @ Shift second arg into ip, keep leftover bits into r1.
161 add r0, r0, ip @ the actual addition
163 @ We now have a 64 bit result in r0-r1.
164 @ Keep absolute value in r0-r1, sign in r3.
165 ands r3, r0, #0x80000000
170 @ Determine how to normalize the result.
179 @ Result needs to be shifted right.
182 add r2, r2, #(1 << 23)
186 add r2, r2, #(1 << 23)
188 @ Our result is now properly aligned into r0, remaining bits in r1.
189 @ Round with MSB of r1. If halfway between two numbers, round towards
192 add r0, r0, r1, lsr #31
196 @ Rounding may have added a new MSB. Adjust exponent.
197 @ That MSB will be cleared when exponent is merged below.
199 addne r2, r2, #(1 << 23)
201 @ Make sure we did not bust our exponent.
205 @ Pack final result together.
207 bic r0, r0, #0x01800000
212 @ Result must be shifted left.
213 @ No rounding necessary since r1 will always be 0.
219 moveq r0, r0, lsl #12
220 subeq r2, r2, #(12 << 23)
223 subeq r2, r2, #(8 << 23)
226 subeq r2, r2, #(4 << 23)
229 subeq r2, r2, #(2 << 23)
232 subeq r2, r2, #(1 << 23)
241 subs r2, r2, ip, lsl #23
246 @ Exponent too small, denormalize result.
249 orr r0, r3, r0, lsr r2
252 @ Fixup and adjust bit position for denormalized arguments.
253 @ Note that r2 must not remain equal to 0.
256 eoreq r0, r0, #0x00800000
257 addeq r2, r2, #(1 << 23)
258 eor r1, r1, #0x00800000
259 subne r3, r3, #(1 << 23)
262 @ Result is x - x = 0, unless x is INF or NAN.
265 and r2, r0, ip, lsr #1
271 @ Overflow: return INF.
273 orr r0, r3, #0x7f000000
274 orr r0, r0, #0x00800000
277 @ At least one of r0/r1 is INF/NAN.
278 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
279 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
280 @ if r0 or r1 is NAN: return NAN
281 @ if opposite sign: return NAN
282 @ return r0 (which is INF or -INF)
289 moveqs r2, r1, lsl #9
291 orrne r0, r3, #0x00400000 @ NAN
295 ARM_FUNC_START floatunsisf
299 ARM_FUNC_START floatsisf
300 ands r3, r0, #0x80000000
307 mov r2, #((127 + 23) << 23)
311 @ We need to scale the value a little before branching to code above.
313 movne r1, r0, lsl #28
315 addne r2, r2, #(4 << 23)
319 orr r1, r1, r0, lsl #30
321 add r2, r2, #(2 << 23)
325 ARM_FUNC_START mulsf3
327 @ Mask out exponents.
329 and r2, r0, ip, lsr #1
330 and r3, r1, ip, lsr #1
337 @ Trap any multiplication by 0.
338 bics ip, r0, #0x80000000
339 bicnes ip, r1, #0x80000000
342 @ Shift exponents right one bit to make room for overflow bit.
343 @ If either of them is 0, scale denormalized arguments off line.
344 @ Then add both exponents together.
349 add r2, r2, r3, asr #1
351 @ Preserve final sign in r2 along with exponent for now.
353 orrmi r2, r2, #0x8000
355 @ Convert mantissa to unsigned integer.
356 bic r0, r0, #0xff000000
357 bic r1, r1, #0xff000000
358 orr r0, r0, #0x00800000
359 orr r1, r1, #0x00800000
363 @ Well, no way to make it shorter without the umull instruction.
364 @ We must perform that 24 x 24 -> 48 bit multiplication by hand.
368 bic r0, r0, #0x00ff0000
369 bic r1, r1, #0x00ff0000
374 adds r3, r3, r0, lsl #16
375 adc ip, ip, r0, lsr #16
380 umull r3, ip, r0, r1 @ The actual multiplication.
384 @ Put final sign in r0.
388 @ Adjust result if one extra MSB appeared.
389 @ The LSB may be lost but this never changes the result in this case.
391 addne r2, r2, #(1 << 22)
392 movnes ip, ip, lsr #1
395 @ Apply exponent bias, check range for underflow.
396 subs r2, r2, #(127 << 22)
399 @ Scale back to 24 bits with rounding.
400 @ r0 contains sign bit already.
401 orrs r0, r0, r3, lsr #23
402 adc r0, r0, ip, lsl #9
404 @ If halfway between two numbers, rounding should be towards LSB = 0.
409 @ Note: rounding may have produced an extra MSB here.
410 @ The extra bit is cleared before merging the exponent below.
412 addne r2, r2, #(1 << 22)
414 @ Check for exponent overflow
418 @ Add final exponent.
419 bic r0, r0, #0x01800000
420 orr r0, r0, r2, lsl #1
423 @ Result is 0, but determine sign anyway.
424 LSYM(Lml_z): eor r0, r0, r1
425 bic r0, r0, #0x7fffffff
428 @ Check if denormalized result is possible, otherwise return signed 0.
433 @ Find out proper shift value.
438 @ Shift value left, round, etc.
440 orrs r0, r0, r3, lsr r1
442 adc r0, r0, ip, lsl r1
448 @ Shift value right, round, etc.
449 @ Note: r1 must not be 0 otherwise carry does not get set.
451 orrs r0, r0, ip, lsr r1
456 teqeq ip, #0x80000000
460 @ One or both arguments are denormalized.
461 @ Scale them leftwards and preserve sign bit.
464 and ip, r0, #0x80000000
465 1: moveq r0, r0, lsl #1
466 tsteq r0, #0x00800000
467 subeq r2, r2, #(1 << 22)
471 and ip, r1, #0x80000000
472 2: moveq r1, r1, lsl #1
473 tsteq r1, #0x00800000
474 subeq r3, r3, #(1 << 23)
479 @ One or both args are INF or NAN.
483 teqne r0, #0x80000000
484 teqne r1, #0x80000000
485 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
489 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
490 1: teq r3, ip, lsr #1
493 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
495 @ Result is INF, but we need to determine its sign.
499 @ Overflow: return INF (sign already in r0).
501 and r0, r0, #0x80000000
502 orr r0, r0, #0x7f000000
503 orr r0, r0, #0x00800000
509 orr r0, r0, #0x00c00000
513 ARM_FUNC_START divsf3
515 @ Mask out exponents.
517 and r2, r0, ip, lsr #1
518 and r3, r1, ip, lsr #1
520 @ Trap any INF/NAN or zeroes.
523 bicnes ip, r0, #0x80000000
524 bicnes ip, r1, #0x80000000
527 @ Shift exponents right one bit to make room for overflow bit.
528 @ If either of them is 0, scale denormalized arguments off line.
529 @ Then substract divisor exponent from dividend''s.
534 sub r2, r2, r3, asr #1
536 @ Preserve final sign into ip.
539 @ Convert mantissa to unsigned integer.
540 @ Dividend -> r3, divisor -> r1.
545 orr r1, r3, r1, lsr #4
546 orr r3, r3, r0, lsr #4
548 @ Initialize r0 (result) with final sign bit.
549 and r0, ip, #0x80000000
551 @ Ensure result will land to known bit position.
553 subcc r2, r2, #(1 << 22)
556 @ Apply exponent bias, check range for over/underflow.
557 add r2, r2, #(127 << 22)
563 @ The actual division loop.
569 subcs r3, r3, r1, lsr #1
570 orrcs r0, r0, ip, lsr #1
572 subcs r3, r3, r1, lsr #2
573 orrcs r0, r0, ip, lsr #2
575 subcs r3, r3, r1, lsr #3
576 orrcs r0, r0, ip, lsr #3
578 movnes ip, ip, lsr #4
581 @ Check if denormalized result is needed.
585 @ Apply proper rounding.
590 @ Add exponent to result.
591 bic r0, r0, #0x00800000
592 orr r0, r0, r2, lsl #1
595 @ Division by 0x1p*: let''s shortcut a lot of code.
597 and ip, ip, #0x80000000
598 orr r0, ip, r0, lsr #9
599 add r2, r2, #(127 << 22)
603 orrgt r0, r0, r2, lsl #1
608 orr r0, r0, #0x00800000
611 @ Result must be denormalized: prepare parameters to use code above.
612 @ r3 already contains remainder for rounding considerations.
614 bic ip, r0, #0x80000000
615 and r0, r0, #0x80000000
620 @ One or both arguments are denormalized.
621 @ Scale them leftwards and preserve sign bit.
624 and ip, r0, #0x80000000
625 1: moveq r0, r0, lsl #1
626 tsteq r0, #0x00800000
627 subeq r2, r2, #(1 << 22)
631 and ip, r1, #0x80000000
632 2: moveq r1, r1, lsl #1
633 tsteq r1, #0x00800000
634 subeq r3, r3, #(1 << 23)
639 @ One or both arguments is either INF, NAN or zero.
644 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
648 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
649 b LSYM(Lml_i) @ INF / <anything> -> INF
650 1: teq r3, ip, lsr #1
653 bne LSYM(Lml_n) @ <anything> / NAN -> NAN
654 b LSYM(Lml_z) @ <anything> / INF -> 0
655 2: @ One or both arguments are 0.
656 bics r2, r0, #0x80000000
657 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
658 bics r3, r1, #0x80000000
659 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
660 b LSYM(Lml_n) @ 0 / 0 -> NAN
675 ARM_FUNC_START cmpsf2
676 mov r3, #1 @ how should we specify unordered here?
678 1: @ Trap any INF/NAN first.
680 and r2, r1, ip, lsr #1
682 and r2, r0, ip, lsr #1
687 @ Note that 0.0 is equal to -0.0.
689 bics r3, r3, #0x80000000 @ either 0.0 or -0.0
690 teqne r0, r1 @ or both the same
694 @ Check for sign difference. The N flag is set if it is the case.
695 @ If so, return sign of r0.
696 movmi r0, r0, asr #31
701 and r3, r1, ip, lsr #1
704 @ Compare mantissa if exponents are equal
707 movcs r0, r1, asr #31
708 mvncc r0, r1, asr #31
713 3: and r2, r1, ip, lsr #1
718 4: and r2, r0, ip, lsr #1
722 beq 2b @ r0 is not NAN
723 5: mov r0, r3 @ return unordered code from r3.
727 ARM_FUNC_START unordsf2
729 and r2, r1, ip, lsr #1
734 1: and r2, r0, ip, lsr #1
739 2: mov r0, #0 @ arguments are ordered.
741 3: mov r0, #1 @ arguments are unordered.
745 ARM_FUNC_START fixsfsi
747 RETc(eq) @ value is 0.
748 @ preserve C flag (the actual sign)
755 @ check exponent range.
756 and r2, r0, #0xff000000
758 movcc r0, #0 @ value is too small
760 cmp r2, #((127 + 31) << 24)
761 bcs 1f @ value is too large
764 orr r0, r0, #0x80000000
766 rsb r2, r2, #(127 + 31)
768 tst r1, #0x20000000 @ the sign bit
772 1: teq r2, #0xff000000
776 2: tst r1, #0x20000000 @ the sign bit
777 moveq r0, #0x7fffffff @ the maximum signed positive si
778 movne r0, #0x80000000 @ the maximum signed negative si
781 3: mov r0, #0 @ What should we convert NAN to?
785 ARM_FUNC_START fixunssfsi
787 RETc(eq) @ value is 0.
789 RETc(cs) @ value is negative.
791 @ check exponent range.
792 and r2, r0, #0xff000000
794 movcc r0, #0 @ value is too small
796 cmp r2, #((127 + 32) << 24)
797 bcs 1f @ value is too large
800 orr r0, r0, #0x80000000
802 rsb r2, r2, #(127 + 31)
806 1: teq r2, #0xff000000
810 2: mov r0, #0xffffffff @ maximum unsigned si