1 # ieee754 sf routines for FT32
3 /* Copyright (C) 1995-2017 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 # See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
25 # for implementation details of all except division which is detailed below
30 nan: .long 0x7FFFFFFF # also abs mask
32 sign_mask: .long 0x80000000
33 m_mask: .long 0x007FFFFF
35 edge_case: .long 0x00FFFFFF
36 smallest_norm: .long 0x00800000 # implicit bit
37 high_FF: .long 0xFF000000
38 high_uint: .long 0xFFFFFFFF
41 .byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
42 .byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
43 .byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
44 .byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
48 # Supply a few 'missing' instructions
61 # set $cc from the result of "ashl reg,dist"
62 .macro ashlcc reg,dist
63 .long 0x5de04008 | (\reg << 15) | (\dist << 4)
67 # converts an unsigned number x to a signed rep based on the bits in sign
68 # sign should be 0x00000000 or 0xffffffff.
69 .macro to_signed x, sign
70 add \x,\x,\sign # conditionally decrement x
71 xor \x,\x,\sign # conditionally complement x
80 # calculate trailing zero count in x, also uses scr.
81 # Using Seal's algorithm
98 # calculate leading zero count
105 # Round 26 bit mantissa to nearest
106 # | 23 bits frac | G | R | S |
107 .macro round m, s1, s2
116 # If NZ, set the LSB of reg
119 or \reg,\reg,1 # set the sticky bit to 1
123 ##########################################################################
124 ##########################################################################
125 ## addition & subtraction
127 #if defined(L_subsf3) || defined(L_addsub_sf)
130 # this is subtraction, so we just change the sign of r1
136 #if defined(L_addsf3) || defined(L_addsub_sf)
139 # x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||--
141 bextu $r2,$r0,(8<<5)|23 # ex in r2
142 bextu $r3,$r1,(8<<5)|23 # ey in r3
143 sub $r5,$r2,$r3 # d = ex - ey
145 # Special values are 0x00 and 0xff in ex and ey.
146 # If (ex&ey) != 0 or (xy|ey)=255 then there may be
153 jmpc nz,no_special_vals
155 # Check for early exit
157 jmpc z,test_if_not_255
159 jmpc nz,no_early_exit
180 # setup to test for special values
185 # test for special values
187 jmpc gte,ex_spec_is_gte
191 jmpc nz,no_special_vals
210 jmpc nz,no_special_vals
211 ashl $r6,$r0,9 # clear all except x frac
212 ashl $r7,$r1,9 # clear all except y frac
216 lshr $r4,$r0,31 # sx in r4
217 lshr $r5,$r1,31 # sy in r4
225 ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e
226 #----------------------
227 ashr $r4,$r0,31 # sx in r4
228 ashl $r0,$r0,3 # shift mx 3 for GRS bits
229 bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
230 # change mx to signed mantissa
232 #----------------------
233 ashr $r4,$r1,31 # sy in r4
234 ashl $r1,$r1,3 # shift my 3 for GRS bits
235 bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
236 # change my to signed mantissa
238 #----------------------
239 # test if we swap ms based on d sign
246 # d positive means that ex>=ey, so ez = ex
247 # d negative means that ey>ex, so ez = ey
252 # now $r2 = ez = max(ex,ey)
253 cmp $r5,26 # max necessary alignment shift is 26
258 ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
260 tst $r1,$r7 # determine value of sticky bit
268 # $r4 = sign(mx), mx = |mx|
273 # realign mantissa using leading zero count
277 btst $r0,(6<<5)|0 # test low bits for sticky again
293 # mz == 0? if so, we just bail with a +0
295 jmpc nz,msum_not_zero
299 # Combined check that (1 <= ez <= 254)
302 jmpc b,no_special_ret
311 jmpc lt,no_special_ret
317 ldl $r2,$r2,(8<<5)|23
318 bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
321 ldl $r4,$r4,(1<<5)|31
322 bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
326 ##########################################################################
327 ##########################################################################
333 # x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||--
336 bextu $r2,$r0,(8<<5)|23 # ex in r2
337 bextu $r3,$r1,(8<<5)|23 # ey in r3
341 and $r4,$r4,$r5 # sz in r4
343 # unpack m add implicit bit
344 ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e
345 #----------------------
346 bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx
354 jmpc b,no_special_vals_mul
357 # Check for early exit
361 jmpc nz,no_early_exit_mul
364 jmpc z,no_early_exit_mul
366 jmpc z,no_early_exit_mul
370 # setup to test for special values
375 # test for special values
377 jmpc gte,ex_spec_is_gte_ey_mul
379 ex_spec_is_gte_ey_mul:
381 jmpc nz,no_special_vals_mul
383 jmpc nz,ex_not_FF_mul
416 bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
419 sub $r3,$r3,127 # ez in r3
425 btst $r1,(1<<5)|15 # XXX use jmpx
429 # 48-bit product is in (r1,r2). The low 22 bits of r2
433 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
436 add $r3,$r3,1 # bump exponent
445 jmpc b,no_special_ret_mul
448 # When the final exponent <= 0, result is flushed to 0 except
449 # for the border case 0x00FFFFFF which is promoted to next higher
450 # FP no., that is, the smallest "normalized" number.
454 ldl $r3,$r3,(8<<5)|23
455 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
459 lpm $r0,smallest_norm
467 jmpc lt,no_special_ret_mul
473 ldl $r3,$r3,(8<<5)|23
474 bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
482 # 48-bit product is in (r1,r2). The low 21 bits of r2
486 or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
496 jmpc b,no_special_ret_mul
500 ##########################################################################
501 ##########################################################################
504 ## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
505 ## for implementation details
511 dc_1: .long 0xffffe7d7
512 dc_2: .long 0xffffffe8
513 dc_3: .long 0xffbad86f
514 dc_4: .long 0xfffbece7
515 dc_5: .long 0xf3672b51
516 dc_6: .long 0xfd9d3a3e
517 dc_7: .long 0x9a3c4390
518 dc_8: .long 0xd4d2ce9b
519 dc_9: .long 0x1bba92b3
520 dc_10: .long 0x525a1a8b
521 dc_11: .long 0x0452b1bf
522 dc_12: .long 0xFFFFFFC0
523 spec_val_test: .long 0x7F7FFFFF
528 # x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||-
529 bextu $r10,$r0,(8<<5)|23 # ex in r2
530 bextu $r11,$r1,(8<<5)|23 # ey in r3
532 and $r2, $r0, $r6 # mx
533 and $r3, $r1, $r6 # my
535 bextu $r2,$r30,(1<<5)|4 # c = Tx >= T;
536 ashl $r3,$r3,9 # T = X << 9;
538 ashl $r4,$r0,8 # X8 = X << 8;
539 or $r4,$r4,$r13 # Mx = X8 | 0x80000000;
540 lshr $r5,$r4,$r2 # S = Mx >> c;
544 sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
547 and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
550 jmpc nz, no_early_ret_dev
552 jmpc z, no_early_ret_dev
554 jmpc z, no_early_ret_dev
559 # setup to test for special values
564 # test for special values
566 jmpc gte, absXm1_gte_absYm1
570 jmpc nz, no_spec_ret_div
572 jmpc nz, ex_not_FF_div
574 and $r2, $r0, $r6 # mx
582 jmpc nz, ey_not_FF_div
608 jmpc lt, no_overflow_div
614 # check for underflow
616 jmpc ns, no_underflow_div
617 xnor $r6, $r6, $r6 # -1
621 xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
637 muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
639 sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
640 muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
641 add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
642 muluh $r8, $r3, $r3 # i4 = mul( T , T );
643 muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
645 muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
647 sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
648 muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
649 add $r7, $r7, $r10 # i9 = i3 + i8;
650 muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
652 muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
654 sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
656 muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
658 sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
659 muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
660 add $r10, $r10, $r11 # i16 = i12 + i15;
661 muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
662 add $r7, $r7, $r10 # i18 = i9 + i17;
663 muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
665 muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
667 sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
669 muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
670 add $r8, $r11, $r8 # i23 = i21 + i22;
671 muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
672 muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
673 add $r3, $r7, $r8 # V = i18 + i25;
674 # W = V & 0xFFFFFFC0;
676 and $r3, $r3, $r6 # W
677 # round and pack final values
678 ashl $r0, $r2, 23 # pack D
679 or $r0, $r0, $r12 # pack Sr
681 or $r12, $r12, $r13 # My
682 muluh $r10, $r3, $r12
694 ##########################################################################
695 ##########################################################################
706 ##########################################################################
707 ##########################################################################
708 ## float to int & unsigned int
712 __fixsfsi: # 20 instructions
713 bextu $r1,$r0,(8<<5)|23 # e in r1
714 lshr $r2,$r0,31 # s in r2
716 and $r0,$r0,$r3 # m in r0
727 jmpc gte, int_not_zero # lower limit
732 jmpc lt, int_not_max # upper limit
739 lpm $r3, smallest_norm
740 or $r0, $r0, $r3 # set implicit bit
761 __fixunssfsi: # 19 instructions
762 lshr $r2, $r0, 31 # s in r2
768 bextu $r1, $r0, (8<<5)|23 # e in r1
771 and $r0, $r0, $r3 # m in r0
774 jmpc nz, uint_not_nan
782 jmpc ns, uint_not_zero # lower limit
786 lpm $r3, smallest_norm
787 or $r0, $r0, $r3 # set implicit bit
789 jmpc lt, shift_uint_right
800 ##########################################################################
801 ##########################################################################
802 ## int & unsigned int to float
805 .macro i2f x, s1, s2, s3, lbl
809 jmpc s, float_round\lbl
812 jmp float_no_round\lbl
815 jmpc s, float_shift_right\lbl
818 jmp float_round_and_pack\lbl
819 float_shift_right\lbl:
822 xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
823 ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
824 xnor \s3, \s3 ,0 # NOT
825 tst \x, \s3 # determine value of sticky bit
827 jmpc z,float_round_and_pack\lbl
828 or \x, \x, 1 # set the sticky bit to 1
829 float_round_and_pack\lbl:
830 bextu \s2, \x, (1<<5)|2 # extract low bit of m
831 or \x, \x, \s2 # or p into r
834 btst \x, (1<<5)|24 # test for carry from round
835 jmpc z, float_no_round\lbl
836 sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
842 ldl \s1, \s1, (8<<5)|23
849 __floatsisf: # 32 instructions
851 jmpc nz, float_not_zero
854 ashr $r1, $r0, 31 # s in r1
855 xor $r0, $r0, $r1 # cond neg
857 i2f $r0, $r2, $r3, $r4, 1
858 ldl $r1, $r1, (1<<5)|31
864 .global __floatunsisf
865 __floatunsisf: # 26 instructions
867 jmpc nz, float_not_zero2
870 i2f $r0, $r1, $r2, $r3, 2
875 ##########################################################################
876 ##########################################################################
882 lpm $r3, nan # also abs mask
885 # test if either abs is nan
898 # -- if either is pos
930 lshr $r3,$r0,31 # Shift $r2:$r0 left one
959 xor $r5,$r0,$r1 # $r5 is sign of result
960 ashr $r2,$r0,31 # $r0 = abs($r0)
963 ashr $r2,$r1,31 # $r1 = abs($r1)
977 move $r5,$r0 # $r5 is sign of result
978 ashr $r2,$r0,31 # $r0 = abs($r0)
981 ashr $r2,$r1,31 # $r1 = abs($r1)