1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 59 Temple Place - Suite 330,
28 Boston, MA 02111-1307, USA. */
33 * The goal of this code is to be as fast as possible. This is
34 * not meant to be easy to understand for the casual reader.
36 * Only the default rounding mode is intended for best performances.
37 * Exceptions aren't supported yet, but that can be added quite easily
38 * if necessary without impacting performances.
44 eor r0, r0, #0x80000000 @ flip sign bit
54 eor r1, r1, #0x80000000 @ flip sign bit of second arg
55 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
56 b 1f @ Skip Thumb-code prologue
61 1: @ Compare both args, return zero if equal but the sign.
66 @ If first arg is 0 or -0, return second arg.
67 @ If second arg is 0 or -0, return first arg.
68 bics r2, r0, #0x80000000
70 bicnes r2, r1, #0x80000000
75 and r2, r0, ip, lsr #1
76 and r3, r1, ip, lsr #1
78 @ If either of them is 255, result will be INF or NAN
83 @ Compute exponent difference. Make largest exponent in r2,
84 @ corresponding arg in r0, and positive exponent difference in r3.
92 @ If exponent difference is too large, return largest argument
93 @ already in r0. We need up to 25 bit to handle proper rounding
98 @ Convert mantissa to signed integer.
100 orr r0, r0, #0x00800000
101 bic r0, r0, #0xff000000
104 orr r1, r1, #0x00800000
105 bic r1, r1, #0xff000000
108 @ If exponent == difference, one or both args were denormalized.
109 @ Since this is not common case, rescale them off line.
114 @ Scale down second arg with exponent difference.
115 @ Apply shift one bit left to first arg and the rest to second arg
116 @ to simplify things later, but only if exponent does not become 0.
120 subne r2, r2, #(1 << 23)
123 @ Shift second arg into ip, keep leftover bits into r1.
128 add r0, r0, ip @ the actual addition
130 @ We now have a 64 bit result in r0-r1.
131 @ Keep absolute value in r0-r1, sign in r3.
132 ands r3, r0, #0x80000000
137 @ Determine how to normalize the result.
146 @ Result needs to be shifted right.
149 add r2, r2, #(1 << 23)
153 add r2, r2, #(1 << 23)
155 @ Our result is now properly aligned into r0, remaining bits in r1.
156 @ Round with MSB of r1. If halfway between two numbers, round towards
159 add r0, r0, r1, lsr #31
163 @ Rounding may have added a new MSB. Adjust exponent.
164 @ That MSB will be cleared when exponent is merged below.
166 addne r2, r2, #(1 << 23)
168 @ Make sure we did not bust our exponent.
172 @ Pack final result together.
174 bic r0, r0, #0x01800000
179 @ Result must be shifted left.
180 @ No rounding necessary since r1 will always be 0.
186 moveq r0, r0, lsl #12
187 subeq r2, r2, #(12 << 23)
190 subeq r2, r2, #(8 << 23)
193 subeq r2, r2, #(4 << 23)
196 subeq r2, r2, #(2 << 23)
199 subeq r2, r2, #(1 << 23)
208 subs r2, r2, ip, lsl #23
213 @ Exponent too small, denormalize result.
216 orr r0, r3, r0, lsr r2
219 @ Fixup and adjust bit position for denormalized arguments.
220 @ Note that r2 must not remain equal to 0.
223 eoreq r0, r0, #0x00800000
224 addeq r2, r2, #(1 << 23)
225 eor r1, r1, #0x00800000
226 subne r3, r3, #(1 << 23)
229 @ Result is x - x = 0, unless x is INF or NAN.
232 and r2, r0, ip, lsr #1
238 @ Overflow: return INF.
240 orr r0, r3, #0x7f000000
241 orr r0, r0, #0x00800000
244 @ At least one of r0/r1 is INF/NAN.
245 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
246 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
247 @ if r0 or r1 is NAN: return NAN
248 @ if opposite sign: return NAN
249 @ return r0 (which is INF or -INF)
256 moveqs r2, r1, lsl #9
258 orrne r0, r3, #0x00400000 @ NAN
264 ARM_FUNC_START floatunsisf
268 ARM_FUNC_START floatsisf
269 ands r3, r0, #0x80000000
276 mov r2, #((127 + 23) << 23)
280 @ We need to scale the value a little before branching to code above.
282 movne r1, r0, lsl #28
284 addne r2, r2, #(4 << 23)
288 orr r1, r1, r0, lsl #30
290 add r2, r2, #(2 << 23)
296 #endif /* L_addsubsf3 */
300 ARM_FUNC_START mulsf3
302 @ Mask out exponents.
304 and r2, r0, ip, lsr #1
305 and r3, r1, ip, lsr #1
312 @ Trap any multiplication by 0.
313 bics ip, r0, #0x80000000
314 bicnes ip, r1, #0x80000000
317 @ Shift exponents right one bit to make room for overflow bit.
318 @ If either of them is 0, scale denormalized arguments off line.
319 @ Then add both exponents together.
324 add r2, r2, r3, asr #1
326 @ Preserve final sign in r2 along with exponent for now.
328 orrmi r2, r2, #0x8000
330 @ Convert mantissa to unsigned integer.
331 bic r0, r0, #0xff000000
332 bic r1, r1, #0xff000000
333 orr r0, r0, #0x00800000
334 orr r1, r1, #0x00800000
338 @ Well, no way to make it shorter without the umull instruction.
339 @ We must perform that 24 x 24 -> 48 bit multiplication by hand.
343 bic r0, r0, #0x00ff0000
344 bic r1, r1, #0x00ff0000
349 adds r3, r3, r0, lsl #16
350 adc ip, ip, r0, lsr #16
355 umull r3, ip, r0, r1 @ The actual multiplication.
359 @ Put final sign in r0.
363 @ Adjust result if one extra MSB appeared.
364 @ The LSB may be lost but this never changes the result in this case.
366 addne r2, r2, #(1 << 22)
367 movnes ip, ip, lsr #1
370 @ Apply exponent bias, check range for underflow.
371 subs r2, r2, #(127 << 22)
374 @ Scale back to 24 bits with rounding.
375 @ r0 contains sign bit already.
376 orrs r0, r0, r3, lsr #23
377 adc r0, r0, ip, lsl #9
379 @ If halfway between two numbers, rounding should be towards LSB = 0.
384 @ Note: rounding may have produced an extra MSB here.
385 @ The extra bit is cleared before merging the exponent below.
387 addne r2, r2, #(1 << 22)
389 @ Check for exponent overflow
393 @ Add final exponent.
394 bic r0, r0, #0x01800000
395 orr r0, r0, r2, lsl #1
398 @ Result is 0, but determine sign anyway.
401 bic r0, r0, #0x7fffffff
404 @ Check if denormalized result is possible, otherwise return signed 0.
409 @ Find out proper shift value.
414 @ Shift value left, round, etc.
416 orrs r0, r0, r3, lsr r1
418 adc r0, r0, ip, lsl r1
424 @ Shift value right, round, etc.
425 @ Note: r1 must not be 0 otherwise carry does not get set.
427 orrs r0, r0, ip, lsr r1
432 teqeq ip, #0x80000000
436 @ One or both arguments are denormalized.
437 @ Scale them leftwards and preserve sign bit.
440 and ip, r0, #0x80000000
441 1: moveq r0, r0, lsl #1
442 tsteq r0, #0x00800000
443 subeq r2, r2, #(1 << 22)
447 and ip, r1, #0x80000000
448 2: moveq r1, r1, lsl #1
449 tsteq r1, #0x00800000
450 subeq r3, r3, #(1 << 23)
455 @ One or both args are INF or NAN.
459 teqne r0, #0x80000000
460 teqne r1, #0x80000000
461 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
465 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
466 1: teq r3, ip, lsr #1
469 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
471 @ Result is INF, but we need to determine its sign.
475 @ Overflow: return INF (sign already in r0).
477 and r0, r0, #0x80000000
478 orr r0, r0, #0x7f000000
479 orr r0, r0, #0x00800000
485 orr r0, r0, #0x00c00000
490 ARM_FUNC_START divsf3
492 @ Mask out exponents.
494 and r2, r0, ip, lsr #1
495 and r3, r1, ip, lsr #1
497 @ Trap any INF/NAN or zeroes.
500 bicnes ip, r0, #0x80000000
501 bicnes ip, r1, #0x80000000
504 @ Shift exponents right one bit to make room for overflow bit.
505 @ If either of them is 0, scale denormalized arguments off line.
506 @ Then substract divisor exponent from dividend''s.
511 sub r2, r2, r3, asr #1
513 @ Preserve final sign into ip.
516 @ Convert mantissa to unsigned integer.
517 @ Dividend -> r3, divisor -> r1.
522 orr r1, r3, r1, lsr #4
523 orr r3, r3, r0, lsr #4
525 @ Initialize r0 (result) with final sign bit.
526 and r0, ip, #0x80000000
528 @ Ensure result will land to known bit position.
530 subcc r2, r2, #(1 << 22)
533 @ Apply exponent bias, check range for over/underflow.
534 add r2, r2, #(127 << 22)
540 @ The actual division loop.
546 subcs r3, r3, r1, lsr #1
547 orrcs r0, r0, ip, lsr #1
549 subcs r3, r3, r1, lsr #2
550 orrcs r0, r0, ip, lsr #2
552 subcs r3, r3, r1, lsr #3
553 orrcs r0, r0, ip, lsr #3
555 movnes ip, ip, lsr #4
558 @ Check if denormalized result is needed.
562 @ Apply proper rounding.
567 @ Add exponent to result.
568 bic r0, r0, #0x00800000
569 orr r0, r0, r2, lsl #1
572 @ Division by 0x1p*: let''s shortcut a lot of code.
574 and ip, ip, #0x80000000
575 orr r0, ip, r0, lsr #9
576 add r2, r2, #(127 << 22)
580 orrgt r0, r0, r2, lsl #1
585 orr r0, r0, #0x00800000
588 @ Result must be denormalized: prepare parameters to use code above.
589 @ r3 already contains remainder for rounding considerations.
591 bic ip, r0, #0x80000000
592 and r0, r0, #0x80000000
597 @ One or both arguments are denormalized.
598 @ Scale them leftwards and preserve sign bit.
601 and ip, r0, #0x80000000
602 1: moveq r0, r0, lsl #1
603 tsteq r0, #0x00800000
604 subeq r2, r2, #(1 << 22)
608 and ip, r1, #0x80000000
609 2: moveq r1, r1, lsl #1
610 tsteq r1, #0x00800000
611 subeq r3, r3, #(1 << 23)
616 @ One or both arguments is either INF, NAN or zero.
621 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
625 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
626 b LSYM(Lml_i) @ INF / <anything> -> INF
627 1: teq r3, ip, lsr #1
630 bne LSYM(Lml_n) @ <anything> / NAN -> NAN
631 b LSYM(Lml_z) @ <anything> / INF -> 0
632 2: @ One or both arguments are 0.
633 bics r2, r0, #0x80000000
634 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
635 bics r3, r1, #0x80000000
636 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
637 b LSYM(Lml_n) @ 0 / 0 -> NAN
641 #endif /* L_muldivsf3 */
657 ARM_FUNC_START cmpsf2
658 mov r3, #1 @ how should we specify unordered here?
660 1: @ Trap any INF/NAN first.
662 and r2, r1, ip, lsr #1
664 and r2, r0, ip, lsr #1
669 @ Note that 0.0 is equal to -0.0.
671 bics r3, r3, #0x80000000 @ either 0.0 or -0.0
672 teqne r0, r1 @ or both the same
676 @ Check for sign difference. The N flag is set if it is the case.
677 @ If so, return sign of r0.
678 movmi r0, r0, asr #31
683 and r3, r1, ip, lsr #1
686 @ Compare mantissa if exponents are equal
689 movcs r0, r1, asr #31
690 mvncc r0, r1, asr #31
695 3: and r2, r1, ip, lsr #1
700 4: and r2, r0, ip, lsr #1
704 beq 2b @ r0 is not NAN
705 5: mov r0, r3 @ return unordered code from r3.
716 #endif /* L_cmpsf2 */
720 ARM_FUNC_START unordsf2
722 and r2, r1, ip, lsr #1
727 1: and r2, r0, ip, lsr #1
732 2: mov r0, #0 @ arguments are ordered.
734 3: mov r0, #1 @ arguments are unordered.
739 #endif /* L_unordsf2 */
743 ARM_FUNC_START fixsfsi
745 RETc(eq) @ value is 0.
747 mov r1, r1, rrx @ preserve C flag (the actual sign)
749 @ check exponent range.
750 and r2, r0, #0xff000000
752 movcc r0, #0 @ value is too small
754 cmp r2, #((127 + 31) << 24)
755 bcs 1f @ value is too large
758 orr r0, r0, #0x80000000
760 rsb r2, r2, #(127 + 31)
761 tst r1, #0x80000000 @ the sign bit
766 1: teq r2, #0xff000000
770 2: ands r0, r1, #0x80000000 @ the sign bit
771 moveq r0, #0x7fffffff @ the maximum signed positive si
774 3: mov r0, #0 @ What should we convert NAN to?
779 #endif /* L_fixsfsi */
783 ARM_FUNC_START fixunssfsi
785 movcss r0, #0 @ value is negative...
789 @ check exponent range.
790 and r2, r0, #0xff000000
792 movcc r0, #0 @ value is too small
794 cmp r2, #((127 + 32) << 24)
795 bcs 1f @ value is too large
798 orr r0, r0, #0x80000000
800 rsb r2, r2, #(127 + 31)
804 1: teq r2, #0xff000000
808 2: mov r0, #0xffffffff @ maximum unsigned si
811 3: mov r0, #0 @ What should we convert NAN to?
816 #endif /* L_fixunssfsi */