1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 Free Software Foundation
, Inc.
5 This file is free software
; you can redistribute it and/or modify it
6 under the terms of the GNU General
Public License as published by the
7 Free Software Foundation
; either version 2, or (at your option) any
10 In addition to the permissions
in the GNU General
Public License
, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of
this file
into combinations with other programs
,
13 and to distribute those combinations without any restriction coming
14 from the use of
this file.
(The General
Public License restrictions
15 do apply
in other respects
; for example, they cover modification of
16 the file
, and distribution when
not linked
into a combine
19 This file is distributed
in the hope that it will be useful
, but
20 WITHOUT ANY WARRANTY
; without even the implied warranty of
21 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General
Public License for more details.
24 You should have received a copy of the GNU General
Public License
25 along with
this program
; see the file COPYING. If not, write to
26 the Free Software Foundation
, 51 Franklin Street
, Fifth Floor
,
27 Boston
, MA
02110-1301, USA.
*/
29 !! libgcc routines for the Renesas
/ SuperH SH CPUs.
30 !! Contributed by Steve Chamberlain.
33 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
34 !! recoded
in assembly by Toshiyasu Morita
37 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
38 ELF
local label prefixes by J
"orn Rennecke
41 #include "lib1funcs.h
"
45 .global GLOBAL(ashiftrt_r4_0)
46 .global GLOBAL(ashiftrt_r4_1)
47 .global GLOBAL(ashiftrt_r4_2)
48 .global GLOBAL(ashiftrt_r4_3)
49 .global GLOBAL(ashiftrt_r4_4)
50 .global GLOBAL(ashiftrt_r4_5)
51 .global GLOBAL(ashiftrt_r4_6)
52 .global GLOBAL(ashiftrt_r4_7)
53 .global GLOBAL(ashiftrt_r4_8)
54 .global GLOBAL(ashiftrt_r4_9)
55 .global GLOBAL(ashiftrt_r4_10)
56 .global GLOBAL(ashiftrt_r4_11)
57 .global GLOBAL(ashiftrt_r4_12)
58 .global GLOBAL(ashiftrt_r4_13)
59 .global GLOBAL(ashiftrt_r4_14)
60 .global GLOBAL(ashiftrt_r4_15)
61 .global GLOBAL(ashiftrt_r4_16)
62 .global GLOBAL(ashiftrt_r4_17)
63 .global GLOBAL(ashiftrt_r4_18)
64 .global GLOBAL(ashiftrt_r4_19)
65 .global GLOBAL(ashiftrt_r4_20)
66 .global GLOBAL(ashiftrt_r4_21)
67 .global GLOBAL(ashiftrt_r4_22)
68 .global GLOBAL(ashiftrt_r4_23)
69 .global GLOBAL(ashiftrt_r4_24)
70 .global GLOBAL(ashiftrt_r4_25)
71 .global GLOBAL(ashiftrt_r4_26)
72 .global GLOBAL(ashiftrt_r4_27)
73 .global GLOBAL(ashiftrt_r4_28)
74 .global GLOBAL(ashiftrt_r4_29)
75 .global GLOBAL(ashiftrt_r4_30)
76 .global GLOBAL(ashiftrt_r4_31)
77 .global GLOBAL(ashiftrt_r4_32)
79 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
80 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
81 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
114 GLOBAL(ashiftrt_r4_32):
115 GLOBAL(ashiftrt_r4_31):
120 GLOBAL(ashiftrt_r4_30):
122 GLOBAL(ashiftrt_r4_29):
124 GLOBAL(ashiftrt_r4_28):
126 GLOBAL(ashiftrt_r4_27):
128 GLOBAL(ashiftrt_r4_26):
130 GLOBAL(ashiftrt_r4_25):
132 GLOBAL(ashiftrt_r4_24):
138 GLOBAL(ashiftrt_r4_23):
140 GLOBAL(ashiftrt_r4_22):
142 GLOBAL(ashiftrt_r4_21):
144 GLOBAL(ashiftrt_r4_20):
146 GLOBAL(ashiftrt_r4_19):
148 GLOBAL(ashiftrt_r4_18):
150 GLOBAL(ashiftrt_r4_17):
152 GLOBAL(ashiftrt_r4_16):
157 GLOBAL(ashiftrt_r4_15):
159 GLOBAL(ashiftrt_r4_14):
161 GLOBAL(ashiftrt_r4_13):
163 GLOBAL(ashiftrt_r4_12):
165 GLOBAL(ashiftrt_r4_11):
167 GLOBAL(ashiftrt_r4_10):
169 GLOBAL(ashiftrt_r4_9):
171 GLOBAL(ashiftrt_r4_8):
173 GLOBAL(ashiftrt_r4_7):
175 GLOBAL(ashiftrt_r4_6):
177 GLOBAL(ashiftrt_r4_5):
179 GLOBAL(ashiftrt_r4_4):
181 GLOBAL(ashiftrt_r4_3):
183 GLOBAL(ashiftrt_r4_2):
185 GLOBAL(ashiftrt_r4_1):
189 GLOBAL(ashiftrt_r4_0):
193 ENDFUNC(GLOBAL(ashiftrt_r4_0))
194 ENDFUNC(GLOBAL(ashiftrt_r4_1))
195 ENDFUNC(GLOBAL(ashiftrt_r4_2))
196 ENDFUNC(GLOBAL(ashiftrt_r4_3))
197 ENDFUNC(GLOBAL(ashiftrt_r4_4))
198 ENDFUNC(GLOBAL(ashiftrt_r4_5))
199 ENDFUNC(GLOBAL(ashiftrt_r4_6))
200 ENDFUNC(GLOBAL(ashiftrt_r4_7))
201 ENDFUNC(GLOBAL(ashiftrt_r4_8))
202 ENDFUNC(GLOBAL(ashiftrt_r4_9))
203 ENDFUNC(GLOBAL(ashiftrt_r4_10))
204 ENDFUNC(GLOBAL(ashiftrt_r4_11))
205 ENDFUNC(GLOBAL(ashiftrt_r4_12))
206 ENDFUNC(GLOBAL(ashiftrt_r4_13))
207 ENDFUNC(GLOBAL(ashiftrt_r4_14))
208 ENDFUNC(GLOBAL(ashiftrt_r4_15))
209 ENDFUNC(GLOBAL(ashiftrt_r4_16))
210 ENDFUNC(GLOBAL(ashiftrt_r4_17))
211 ENDFUNC(GLOBAL(ashiftrt_r4_18))
212 ENDFUNC(GLOBAL(ashiftrt_r4_19))
213 ENDFUNC(GLOBAL(ashiftrt_r4_20))
214 ENDFUNC(GLOBAL(ashiftrt_r4_21))
215 ENDFUNC(GLOBAL(ashiftrt_r4_22))
216 ENDFUNC(GLOBAL(ashiftrt_r4_23))
217 ENDFUNC(GLOBAL(ashiftrt_r4_24))
218 ENDFUNC(GLOBAL(ashiftrt_r4_25))
219 ENDFUNC(GLOBAL(ashiftrt_r4_26))
220 ENDFUNC(GLOBAL(ashiftrt_r4_27))
221 ENDFUNC(GLOBAL(ashiftrt_r4_28))
222 ENDFUNC(GLOBAL(ashiftrt_r4_29))
223 ENDFUNC(GLOBAL(ashiftrt_r4_30))
224 ENDFUNC(GLOBAL(ashiftrt_r4_31))
225 ENDFUNC(GLOBAL(ashiftrt_r4_32))
247 .global GLOBAL(ashrsi3)
248 HIDDEN_FUNC(GLOBAL(ashrsi3))
253 mova LOCAL(ashrsi3_table),r0
264 LOCAL(ashrsi3_table):
265 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
266 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
267 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
268 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
269 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
270 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
376 ENDFUNC(GLOBAL(ashrsi3))
397 .global GLOBAL(ashlsi3)
398 HIDDEN_FUNC(GLOBAL(ashlsi3))
403 mova LOCAL(ashlsi3_table),r0
414 LOCAL(ashlsi3_table):
415 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
416 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
417 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
418 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
419 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
420 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
421 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
422 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
423 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
535 ENDFUNC(GLOBAL(ashlsi3))
556 .global GLOBAL(lshrsi3)
557 HIDDEN_FUNC(GLOBAL(lshrsi3))
562 mova LOCAL(lshrsi3_table),r0
573 LOCAL(lshrsi3_table):
574 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
575 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
576 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
577 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
578 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
579 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
580 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
581 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
582 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
694 ENDFUNC(GLOBAL(lshrsi3))
700 .global GLOBAL(movmem)
701 HIDDEN_FUNC(GLOBAL(movmem))
702 HIDDEN_ALIAS(movstr,movmem)
703 /* This would be a lot simpler if r6 contained the byte count
704 minus 64, and we wouldn't be called here for a byte count of 64. */
708 bsr GLOBAL(movmemSI52+2)
711 LOCAL(movmem_loop): /* Reached with rts */
717 bt LOCAL(movmem_done)
724 bt GLOBAL(movmemSI52)
725 ! done all the large groups, do the remainder
727 mova GLOBAL(movmemSI4)+4,r0
730 LOCAL(movmem_done): ! share slot insn, works out aligned.
737 ! ??? We need aliases movstr* for movmem* for the older libraries. These
738 ! aliases will be removed at the some point in the future.
739 .global GLOBAL(movmemSI64)
740 HIDDEN_FUNC(GLOBAL(movmemSI64))
741 HIDDEN_ALIAS(movstrSI64,movmemSI64)
745 .global GLOBAL(movmemSI60)
746 HIDDEN_FUNC(GLOBAL(movmemSI60))
747 HIDDEN_ALIAS(movstrSI60,movmemSI60)
751 .global GLOBAL(movmemSI56)
752 HIDDEN_FUNC(GLOBAL(movmemSI56))
753 HIDDEN_ALIAS(movstrSI56,movmemSI56)
757 .global GLOBAL(movmemSI52)
758 HIDDEN_FUNC(GLOBAL(movmemSI52))
759 HIDDEN_ALIAS(movstrSI52,movmemSI52)
763 .global GLOBAL(movmemSI48)
764 HIDDEN_FUNC(GLOBAL(movmemSI48))
765 HIDDEN_ALIAS(movstrSI48,movmemSI48)
769 .global GLOBAL(movmemSI44)
770 HIDDEN_FUNC(GLOBAL(movmemSI44))
771 HIDDEN_ALIAS(movstrSI44,movmemSI44)
775 .global GLOBAL(movmemSI40)
776 HIDDEN_FUNC(GLOBAL(movmemSI40))
777 HIDDEN_ALIAS(movstrSI40,movmemSI40)
781 .global GLOBAL(movmemSI36)
782 HIDDEN_FUNC(GLOBAL(movmemSI36))
783 HIDDEN_ALIAS(movstrSI36,movmemSI36)
787 .global GLOBAL(movmemSI32)
788 HIDDEN_FUNC(GLOBAL(movmemSI32))
789 HIDDEN_ALIAS(movstrSI32,movmemSI32)
793 .global GLOBAL(movmemSI28)
794 HIDDEN_FUNC(GLOBAL(movmemSI28))
795 HIDDEN_ALIAS(movstrSI28,movmemSI28)
799 .global GLOBAL(movmemSI24)
800 HIDDEN_FUNC(GLOBAL(movmemSI24))
801 HIDDEN_ALIAS(movstrSI24,movmemSI24)
805 .global GLOBAL(movmemSI20)
806 HIDDEN_FUNC(GLOBAL(movmemSI20))
807 HIDDEN_ALIAS(movstrSI20,movmemSI20)
811 .global GLOBAL(movmemSI16)
812 HIDDEN_FUNC(GLOBAL(movmemSI16))
813 HIDDEN_ALIAS(movstrSI16,movmemSI16)
817 .global GLOBAL(movmemSI12)
818 HIDDEN_FUNC(GLOBAL(movmemSI12))
819 HIDDEN_ALIAS(movstrSI12,movmemSI12)
823 .global GLOBAL(movmemSI8)
824 HIDDEN_FUNC(GLOBAL(movmemSI8))
825 HIDDEN_ALIAS(movstrSI8,movmemSI8)
829 .global GLOBAL(movmemSI4)
830 HIDDEN_FUNC(GLOBAL(movmemSI4))
831 HIDDEN_ALIAS(movstrSI4,movmemSI4)
837 ENDFUNC(GLOBAL(movmemSI64))
838 ENDFUNC(GLOBAL(movmemSI60))
839 ENDFUNC(GLOBAL(movmemSI56))
840 ENDFUNC(GLOBAL(movmemSI52))
841 ENDFUNC(GLOBAL(movmemSI48))
842 ENDFUNC(GLOBAL(movmemSI44))
843 ENDFUNC(GLOBAL(movmemSI40))
844 ENDFUNC(GLOBAL(movmemSI36))
845 ENDFUNC(GLOBAL(movmemSI32))
846 ENDFUNC(GLOBAL(movmemSI28))
847 ENDFUNC(GLOBAL(movmemSI24))
848 ENDFUNC(GLOBAL(movmemSI20))
849 ENDFUNC(GLOBAL(movmemSI16))
850 ENDFUNC(GLOBAL(movmemSI12))
851 ENDFUNC(GLOBAL(movmemSI8))
852 ENDFUNC(GLOBAL(movmemSI4))
853 ENDFUNC(GLOBAL(movmem))
858 .global GLOBAL(movmem_i4_even)
859 .global GLOBAL(movmem_i4_odd)
860 .global GLOBAL(movmemSI12_i4)
862 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
863 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
864 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
866 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
867 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
868 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
878 GLOBAL(movmem_i4_even):
880 bra L_movmem_start_even
883 GLOBAL(movmem_i4_odd):
895 bt/s L_movmem_2mod4_end
909 ENDFUNC(GLOBAL(movmem_i4_even))
910 ENDFUNC(GLOBAL(movmem_i4_odd))
913 GLOBAL(movmemSI12_i4):
922 ENDFUNC(GLOBAL(movmemSI12_i4))
928 .global GLOBAL(mulsi3)
929 HIDDEN_FUNC(GLOBAL(mulsi3))
933 ! r0 = aabb*ccdd via partial products
935 ! if aa == 0 and cc = 0
939 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
943 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
944 mov r5,r3 ! r3 = ccdd
945 swap.w r4,r2 ! r2 = bbaa
946 xtrct r2,r3 ! r3 = aacc
947 tst r3,r3 ! msws zero ?
949 rts ! yes - then we have the answer
952 hiset: sts macl,r0 ! r0 = bb*dd
953 mulu.w r2,r5 ! brewing macl = aa*dd
955 mulu.w r3,r4 ! brewing macl = cc*bb
962 ENDFUNC(GLOBAL(mulsi3))
964 #endif /* ! __SH5__ */
967 !! 4 byte integer Divide code for the Renesas SH
969 !! args in r4 and r5, result in fpul, clobber dr0, dr2
971 .global GLOBAL(sdivsi3_i4)
972 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
982 ENDFUNC(GLOBAL(sdivsi3_i4))
983 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
984 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
986 #if ! __SH5__ || __SH5__ == 32
990 .global GLOBAL(sdivsi3_i4)
991 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1006 ENDFUNC(GLOBAL(sdivsi3_i4))
1007 #endif /* ! __SH5__ || __SH5__ == 32 */
1008 #endif /* ! __SH4__ */
1012 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1014 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1016 !! Steve Chamberlain
1021 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1023 .global GLOBAL(sdivsi3)
1026 .section .text..SHmedia32,"ax"
1032 /* The assembly code that follows is a hand-optimized version of the C
1033 code that follows. Note that the registers that are modified are
1034 exactly those listed as clobbered in the patterns divsi3_i1 and
1037 int __sdivsi3 (i, j)
1040 register unsigned long long r18 asm ("r18
");
1041 register unsigned long long r19 asm ("r19
");
1042 register unsigned long long r0 asm ("r0
") = 0;
1043 register unsigned long long r1 asm ("r1
") = 1;
1044 register int r2 asm ("r2
") = i >> 31;
1045 register int r3 asm ("r3
") = j >> 31;
1057 r0 |= r1, r18 -= r19;
1058 while (r19 >>= 1, r1 >>= 1);
1060 return r2 * (int)r0;
1064 pt/l LOCAL(sdivsi3_dontadd), tr2
1065 pt/l LOCAL(sdivsi3_loop), tr1
1078 LOCAL(sdivsi3_loop):
1082 LOCAL(sdivsi3_dontadd):
1091 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1094 // can create absolute value without extra latency,
1095 // but dependent on proper sign extension of inputs:
1098 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1101 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1110 // If r4 was to be used in-place instead of r21, could use this sequence
1111 // to compute absolute:
1112 // sub r63,r4,r19 // compute absolute value of r4
1113 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1114 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1126 mmacnfx.wl r25,r2,r1
1151 #else /* ! 0 && ! 0 */
1154 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1156 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1158 FUNC(GLOBAL(sdivsi3))
1159 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1160 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1161 // with the SHcompact implementation, which clobbers tr1 / tr2.
1162 .global GLOBAL(sdivsi3_1)
1164 .global GLOBAL(div_table_internal)
1165 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1166 shori GLOBAL(div_table_internal) & 65535, r20
1168 .global GLOBAL(sdivsi3_2)
1170 // clobbered: r1,r18,r19,r21,r25,tr0
1173 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1174 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1175 ldx.ub r20, r21, r19 // u0.8
1176 shari r25, 32, r25 // normalize to s2.30
1178 muls.l r25, r19, r19 // s2.38
1179 ldx.w r20, r21, r21 // s2.14
1181 shari r19, 24, r19 // truncate to s2.14
1182 sub r21, r19, r19 // some 11 bit inverse in s1.14
1183 muls.l r19, r19, r21 // u0.28
1186 muls.l r25, r21, r18 // s2.58
1187 shlli r19, 45, r19 // multiply by two and convert to s2.58
1190 shari r18, 28, r18 // some 22 bit inverse in s1.30
1191 muls.l r18, r25, r0 // s2.60
1192 muls.l r18, r4, r25 // s32.30
1194 shari r0, 16, r19 // s-16.44
1195 muls.l r19, r18, r19 // s-16.74
1197 shari r4, 14, r18 // s19.-14
1198 shari r19, 30, r19 // s-16.44
1199 muls.l r19, r18, r19 // s15.30
1200 xor r21, r0, r21 // You could also use the constant 1 << 27.
1207 ENDFUNC(GLOBAL(sdivsi3))
1209 ENDFUNC(GLOBAL(sdivsi3_2))
1211 #elif defined __SHMEDIA__
1212 /* m5compact-nofpu */
1213 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1215 .section .text..SHmedia32,"ax"
1217 FUNC(GLOBAL(sdivsi3))
1219 pt/l LOCAL(sdivsi3_dontsub), tr0
1220 pt/l LOCAL(sdivsi3_loop), tr1
1232 LOCAL(sdivsi3_loop):
1236 LOCAL(sdivsi3_dontsub):
1242 ENDFUNC(GLOBAL(sdivsi3))
1243 #else /* ! __SHMEDIA__ */
1244 FUNC(GLOBAL(sdivsi3))
1329 ENDFUNC(GLOBAL(sdivsi3))
1330 #endif /* ! __SHMEDIA__ */
1331 #endif /* ! __SH4__ */
1336 !! 4 byte integer Divide code for the Renesas SH
1338 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1341 .global GLOBAL(udivsi3_i4)
1342 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1373 .align 3 ! make double below 8 byte aligned.
1378 ENDFUNC(GLOBAL(udivsi3_i4))
1379 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1380 #if ! __SH5__ || __SH5__ == 32
1381 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1383 .global GLOBAL(udivsi3_i4)
1384 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1398 ENDFUNC(GLOBAL(udivsi3_i4))
1399 #endif /* ! __SH5__ || __SH5__ == 32 */
1400 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1401 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1403 .global GLOBAL(udivsi3_i4)
1404 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1433 .align 3 ! make double below 8 byte aligned.
1448 ENDFUNC(GLOBAL(udivsi3_i4))
1449 #endif /* ! __SH4__ */
1453 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1455 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1457 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1458 .global GLOBAL(udivsi3)
1459 HIDDEN_FUNC(GLOBAL(udivsi3))
1463 .section .text..SHmedia32,"ax"
1469 /* The assembly code that follows is a hand-optimized version of the C
1470 code that follows. Note that the registers that are modified are
1471 exactly those listed as clobbered in the patterns udivsi3_i1 and
1478 register unsigned long long r0 asm ("r0
") = 0;
1479 register unsigned long long r18 asm ("r18
") = 1;
1480 register unsigned long long r4 asm ("r4
") = i;
1481 register unsigned long long r19 asm ("r19
") = j;
1487 r0 |= r18, r4 -= r19;
1488 while (r19 >>= 1, r18 >>= 1);
1494 pt/l LOCAL(udivsi3_dontadd), tr2
1495 pt/l LOCAL(udivsi3_loop), tr1
1503 LOCAL(udivsi3_loop):
1507 LOCAL(udivsi3_dontadd):
1515 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1521 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1523 mmulfx.w r21,r21,r19
1524 mshflo.w r21,r63,r21
1526 mmulfx.w r25,r19,r19
1530 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1531 before the msub.w, but we need a different value for
1532 r19 to keep errors under control. */
1534 mmulfx.w r19,r19,r19
1538 mmacnfx.wl r25,r19,r21
1563 #elif defined (__SHMEDIA__)
1564 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1565 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1566 So use a short shmedia loop. */
1567 // clobbered: r20,r21,r25,tr0,tr1,tr2
1569 .section .text..SHmedia32,"ax"
1572 pt/l LOCAL(udivsi3_dontsub), tr0
1573 pt/l LOCAL(udivsi3_loop), tr1
1578 LOCAL(udivsi3_loop):
1582 LOCAL(udivsi3_dontsub):
1587 #else /* ! defined (__SHMEDIA__) */
1591 div1 r5,r4; div1 r5,r4; div1 r5,r4
1592 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1595 div1 r5,r4; rotcl r0
1596 div1 r5,r4; rotcl r0
1597 div1 r5,r4; rotcl r0
1605 bf LOCAL(large_divisor)
1607 bf/s LOCAL(large_divisor)
1629 LOCAL(large_divisor):
1648 ENDFUNC(GLOBAL(udivsi3))
1649 #endif /* ! __SHMEDIA__ */
1650 #endif /* __SH4__ */
1651 #endif /* L_udivsi3 */
1656 .section .text..SHmedia32,"ax"
1658 .global GLOBAL(udivdi3)
1659 FUNC(GLOBAL(udivdi3))
1661 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1666 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1670 sub r63,r22,r20 // r63 == 64 % 64
1672 pta LOCAL(large_divisor),tr0
1678 bgt/u r9,r63,tr0 // large_divisor
1687 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1688 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1689 the case may be, %0000000000000000 000.11111111111, still */
1690 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1695 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1697 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1706 mcmpgt.l r21,r63,r21 // See Note 1
1708 mshfhi.l r63,r21,r21
1712 /* small divisor: need a third divide step */
1722 /* could test r3 here to check for divide by zero. */
1725 LOCAL(large_divisor):
1734 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1735 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1736 the case may be, %0000000000000000 000.11111111111, still */
1737 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1742 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1744 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1748 pta LOCAL(no_lo_adj),tr0
1755 bgtu/u r7,r25,tr0 // no_lo_adj
1761 /* large_divisor: only needs a few adjustments. */
1768 ENDFUNC(GLOBAL(udivdi3))
1769 /* Note 1: To shift the result of the second divide stage so that the result
1770 always fits into 32 bits, yet we still reduce the rest sufficiently
1771 would require a lot of instructions to do the shifts just right. Using
1772 the full 64 bit shift result to multiply with the divisor would require
1773 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1774 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1775 know that the rest after taking this partial result into account will
1776 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1777 upper 32 bits of the partial result are nonzero. */
1778 #endif /* __SHMEDIA__ */
1779 #endif /* L_udivdi3 */
1784 .section .text..SHmedia32,"ax"
1786 .global GLOBAL(divdi3)
1787 FUNC(GLOBAL(divdi3))
1789 pta GLOBAL(udivdi3_internal),tr0
1801 ENDFUNC(GLOBAL(divdi3))
1802 #endif /* __SHMEDIA__ */
1803 #endif /* L_divdi3 */
1808 .section .text..SHmedia32,"ax"
1810 .global GLOBAL(umoddi3)
1811 FUNC(GLOBAL(umoddi3))
1813 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1818 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1822 sub r63,r22,r20 // r63 == 64 % 64
1824 pta LOCAL(large_divisor),tr0
1830 bgt/u r9,r63,tr0 // large_divisor
1839 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1840 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1841 the case may be, %0000000000000000 000.11111111111, still */
1842 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1847 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1849 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1854 /* bubble */ /* could test r3 here to check for divide by zero. */
1857 mcmpgt.l r21,r63,r21 // See Note 1
1859 mshfhi.l r63,r21,r21
1863 /* small divisor: need a third divide step */
1866 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1876 LOCAL(large_divisor):
1885 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1886 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1887 the case may be, %0000000000000000 000.11111111111, still */
1888 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1893 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1895 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1899 pta LOCAL(no_lo_adj),tr0
1906 bgtu/u r7,r25,tr0 // no_lo_adj
1912 /* large_divisor: only needs a few adjustments. */
1921 ENDFUNC(GLOBAL(umoddi3))
1922 /* Note 1: To shift the result of the second divide stage so that the result
1923 always fits into 32 bits, yet we still reduce the rest sufficiently
1924 would require a lot of instructions to do the shifts just right. Using
1925 the full 64 bit shift result to multiply with the divisor would require
1926 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1927 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1928 know that the rest after taking this partial result into account will
1929 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1930 upper 32 bits of the partial result are nonzero. */
1931 #endif /* __SHMEDIA__ */
1932 #endif /* L_umoddi3 */
1937 .section .text..SHmedia32,"ax"
1939 .global GLOBAL(moddi3)
1940 FUNC(GLOBAL(moddi3))
1942 pta GLOBAL(umoddi3_internal),tr0
1954 ENDFUNC(GLOBAL(moddi3))
1955 #endif /* __SHMEDIA__ */
1956 #endif /* L_moddi3 */
1959 #if !defined (__SH2A_NOFPU__)
1960 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1964 .global GLOBAL(set_fpscr)
1965 HIDDEN_FUNC(GLOBAL(set_fpscr))
1970 mova LOCAL(set_fpscr_L0),r0
1971 mov.l LOCAL(set_fpscr_L0),r12
1973 mov.l LOCAL(set_fpscr_L1),r0
1977 mov.l LOCAL(set_fpscr_L1),r1
1984 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1987 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1996 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2000 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2007 LOCAL(set_fpscr_L0):
2008 .long _GLOBAL_OFFSET_TABLE_
2009 LOCAL(set_fpscr_L1):
2010 .long GLOBAL(fpscr_values@GOT)
2012 LOCAL(set_fpscr_L1):
2013 .long GLOBAL(fpscr_values)
2016 ENDFUNC(GLOBAL(set_fpscr))
2017 #ifndef NO_FPSCR_VALUES
2019 .comm GLOBAL(fpscr_values),8,4
2021 .comm GLOBAL(fpscr_values),8
2023 #endif /* NO_FPSCR_VALUES */
2024 #endif /* SH2E / SH3E / SH4 */
2025 #endif /* __SH2A_NOFPU__ */
2026 #endif /* L_set_fpscr */
2027 #ifdef L_ic_invalidate
2030 .section .text..SHmedia32,"ax"
2032 .global GLOBAL(init_trampoline)
2033 HIDDEN_FUNC(GLOBAL(init_trampoline))
2034 GLOBAL(init_trampoline):
2036 #ifdef __LITTLE_ENDIAN__
2042 movi 0xffffffffffffd002,r20
2049 ENDFUNC(GLOBAL(init_trampoline))
2050 .global GLOBAL(ic_invalidate)
2051 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2052 GLOBAL(ic_invalidate):
2059 ENDFUNC(GLOBAL(ic_invalidate))
2060 #elif defined(__SH4A__)
2061 .global GLOBAL(ic_invalidate)
2062 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2063 GLOBAL(ic_invalidate):
2068 ENDFUNC(GLOBAL(ic_invalidate))
2069 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2070 /* For system code, we use ic_invalidate_line_i, but user code
2071 needs a different mechanism. A kernel call is generally not
2072 available, and it would also be slow. Different SH4 variants use
2073 different sizes and associativities of the Icache. We use a small
2074 bit of dispatch code that can be put hidden in every shared object,
2075 which calls the actual processor-specific invalidation code in a
2077 Or if you have operating system support, the OS could mmap the
2078 procesor-specific code from a single page, since it is highly
2080 .global GLOBAL(ic_invalidate)
2081 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2082 GLOBAL(ic_invalidate):
2098 0: .long GLOBAL(ic_invalidate_array)
2100 .global GLOBAL(ic_invalidate_array)
2101 /* ??? Why won't the assembler allow to add these two constants? */
2102 0: .long _GLOBAL_OFFSET_TABLE_
2103 1: .long GLOBAL(ic_invalidate_array)@GOT
2104 ENDFUNC(GLOBAL(ic_invalidate))
2105 #endif /* __pic__ */
2107 #endif /* L_ic_invalidate */
2109 #ifdef L_ic_invalidate_array
2110 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2111 .global GLOBAL(ic_invalidate_array)
2112 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2113 .global GLOBAL(ic_invalidate_array)
2114 FUNC(GLOBAL(ic_invalidate_array))
2115 GLOBAL(ic_invalidate_array):
2121 ENDFUNC(GLOBAL(ic_invalidate_array))
2122 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2123 .global GLOBAL(ic_invalidate_array)
2125 FUNC(GLOBAL(ic_invalidate_array))
2126 /* This must be aligned to the beginning of a cache line. */
2127 GLOBAL(ic_invalidate_array):
2130 #define WAY_SIZE 0x4000
2133 .rept WAY_SIZE * WAYS / 32
2141 .rept WAY_SIZE * WAYS / 32
2155 #else /* WAYS > 6 */
2156 /* This variant needs two different pages for mmap-ing. */
2174 ENDFUNC(GLOBAL(ic_invalidate_array))
2176 #endif /* L_ic_invalidate_array */
2178 #if defined (__SH5__) && __SH5__ == 32
2179 #ifdef L_shcompact_call_trampoline
2182 LOCAL(ct_main_table):
2183 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2184 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2185 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2186 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2187 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2188 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2189 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2190 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2191 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2192 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2193 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2194 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2195 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2196 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2197 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2198 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2199 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2200 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2201 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2202 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2203 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2204 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2205 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2206 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2207 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2208 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2209 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2210 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2211 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2212 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2213 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2214 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2215 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2217 .section .text..SHmedia32, "ax"
2220 /* This function loads 64-bit general-purpose registers from the
2221 stack, from a memory address contained in them or from an FP
2222 register, according to a cookie passed in r1. Its execution
2223 time is linear on the number of registers that actually have
2224 to be copied. See sh.h for details on the actual bit pattern.
2226 The function to be called is passed in r0. If a 32-bit return
2227 value is expected, the actual function will be tail-called,
2228 otherwise the return address will be stored in r10 (that the
2229 caller should expect to be clobbered) and the return value
2230 will be expanded into r2/r3 upon return. */
2232 .global GLOBAL(GCC_shcompact_call_trampoline)
2233 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2234 GLOBAL(GCC_shcompact_call_trampoline):
2235 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2236 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2237 pt/l LOCAL(ct_loop), tr1
2239 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2244 LOCAL(ct_main_label):
2247 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2248 /* It must be dr0, so just do it. */
2254 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2255 /* It is either dr0 or dr2. */
2264 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2265 shlri r1, 23 - 3, r34
2266 andi r34, 3 << 3, r33
2267 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2268 LOCAL(ct_r4_fp_base):
2274 LOCAL(ct_r4_fp_copy):
2281 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2282 shlri r1, 20 - 3, r34
2283 andi r34, 3 << 3, r33
2284 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2285 LOCAL(ct_r5_fp_base):
2291 LOCAL(ct_r5_fp_copy):
2300 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2301 /* It must be dr8. */
2307 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2308 shlri r1, 16 - 3, r34
2309 andi r34, 3 << 3, r33
2310 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2311 LOCAL(ct_r6_fp_base):
2317 LOCAL(ct_r6_fp_copy):
2326 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2327 /* It is either dr8 or dr10. */
2335 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2336 shlri r1, 12 - 3, r34
2337 andi r34, 3 << 3, r33
2338 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2339 LOCAL(ct_r7_fp_base):
2344 LOCAL(ct_r7_fp_copy):
2353 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2354 /* It is either dr8 or dr10. */
2356 andi r1, 1 << 8, r32
2362 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2363 shlri r1, 8 - 3, r34
2364 andi r34, 3 << 3, r33
2365 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2366 LOCAL(ct_r8_fp_base):
2371 LOCAL(ct_r8_fp_copy):
2380 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2381 /* It is either dr8 or dr10. */
2383 andi r1, 1 << 4, r32
2389 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2390 shlri r1, 4 - 3, r34
2391 andi r34, 3 << 3, r33
2392 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2393 LOCAL(ct_r9_fp_base):
2398 LOCAL(ct_r9_fp_copy):
2407 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2408 pt/l LOCAL(ct_r2_load), tr2
2417 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2418 pt/l LOCAL(ct_r3_load), tr2
2426 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2427 pt/l LOCAL(ct_r4_load), tr2
2435 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2436 pt/l LOCAL(ct_r5_load), tr2
2444 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2445 pt/l LOCAL(ct_r6_load), tr2
2452 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2453 pt/l LOCAL(ct_r7_load), tr2
2460 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2461 pt/l LOCAL(ct_r8_load), tr2
2468 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2469 pt/l LOCAL(ct_check_tramp), tr2
2493 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2500 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2507 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2514 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2521 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2528 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2534 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2540 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2541 andi r1, 7 << 1, r30
2542 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2544 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2548 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2561 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2564 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2565 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2566 pt/u LOCAL(ct_ret_wide), tr2
2569 LOCAL(ct_call_func): /* Just branch to the function. */
2571 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2572 64-bit return value. */
2576 #if __LITTLE_ENDIAN__
2585 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2586 #endif /* L_shcompact_call_trampoline */
2588 #ifdef L_shcompact_return_trampoline
2589 /* This function does the converse of the code in `ret_wide'
2590 above. It is tail-called by SHcompact functions returning
2591 64-bit non-floating-point values, to pack the 32-bit values in
2592 r2 and r3 into r2. */
2595 .section .text..SHmedia32, "ax"
2597 .global GLOBAL(GCC_shcompact_return_trampoline)
2598 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2599 GLOBAL(GCC_shcompact_return_trampoline):
2601 #if __LITTLE_ENDIAN__
2611 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2612 #endif /* L_shcompact_return_trampoline */
2614 #ifdef L_shcompact_incoming_args
2617 LOCAL(ia_main_table):
2618 .word 1 /* Invalid, just loop */
2619 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2620 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2621 .word 1 /* Invalid, just loop */
2622 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2623 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2624 .word 1 /* Invalid, just loop */
2625 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2626 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2627 .word 1 /* Invalid, just loop */
2628 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2629 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2630 .word 1 /* Invalid, just loop */
2631 .word 1 /* Invalid, just loop */
2632 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2633 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2634 .word 1 /* Invalid, just loop */
2635 .word 1 /* Invalid, just loop */
2636 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2637 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2638 .word 1 /* Invalid, just loop */
2639 .word 1 /* Invalid, just loop */
2640 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2641 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2642 .word 1 /* Invalid, just loop */
2643 .word 1 /* Invalid, just loop */
2644 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2645 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2646 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2647 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2648 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2649 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2650 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2652 .section .text..SHmedia32, "ax"
2655 /* This function stores 64-bit general-purpose registers back in
2656 the stack, and loads the address in which each register
2657 was stored into itself. The lower 32 bits of r17 hold the address
2658 to begin storing, and the upper 32 bits of r17 hold the cookie.
2659 Its execution time is linear on the
2660 number of registers that actually have to be copied, and it is
2661 optimized for structures larger than 64 bits, as opposed to
2662 individual `long long' arguments. See sh.h for details on the
2663 actual bit pattern. */
2665 .global GLOBAL(GCC_shcompact_incoming_args)
2666 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2667 GLOBAL(GCC_shcompact_incoming_args):
2668 ptabs/l r18, tr0 /* Prepare to return. */
2669 shlri r17, 32, r0 /* Load the cookie. */
2670 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2671 pt/l LOCAL(ia_loop), tr1
2673 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2678 LOCAL(ia_main_label):
2681 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2690 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2699 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2708 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2717 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2726 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2734 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2742 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2746 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2753 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2760 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2767 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2774 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2781 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2787 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2793 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2794 andi r0, 7 << 1, r38
2795 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2797 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2801 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2814 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2816 LOCAL(ia_return): /* Return. */
2818 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2819 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2820 #endif /* L_shcompact_incoming_args */
2823 #ifdef L_nested_trampoline
2825 .section .text..SHmedia32,"ax"
2829 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2830 .global GLOBAL(GCC_nested_trampoline)
2831 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2832 GLOBAL(GCC_nested_trampoline):
2849 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2850 #endif /* L_nested_trampoline */
2851 #endif /* __SH5__ */
2853 #ifdef L_push_pop_shmedia_regs
2854 .section .text..SHmedia32,"ax"
2857 #ifndef __SH4_NOFPU__
2858 .global GLOBAL(GCC_push_shmedia_regs)
2859 FUNC(GLOBAL(GCC_push_shmedia_regs))
2860 GLOBAL(GCC_push_shmedia_regs):
2861 addi.l r15, -14*8, r15
2862 fst.d r15, 13*8, dr62
2863 fst.d r15, 12*8, dr60
2864 fst.d r15, 11*8, dr58
2865 fst.d r15, 10*8, dr56
2866 fst.d r15, 9*8, dr54
2867 fst.d r15, 8*8, dr52
2868 fst.d r15, 7*8, dr50
2869 fst.d r15, 6*8, dr48
2870 fst.d r15, 5*8, dr46
2871 fst.d r15, 4*8, dr44
2872 fst.d r15, 3*8, dr42
2873 fst.d r15, 2*8, dr40
2874 fst.d r15, 1*8, dr38
2875 fst.d r15, 0*8, dr36
2876 #else /* ! __SH4_NOFPU__ */
2877 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2878 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2879 GLOBAL(GCC_push_shmedia_regs_nofpu):
2880 #endif /* ! __SH4_NOFPU__ */
2882 addi.l r15, -27*8, r15
2914 #ifndef __SH4_NOFPU__
2915 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2917 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2919 #ifndef __SH4_NOFPU__
2920 .global GLOBAL(GCC_pop_shmedia_regs)
2921 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2922 GLOBAL(GCC_pop_shmedia_regs):
2925 fld.d r15, 40*8, dr62
2926 fld.d r15, 39*8, dr60
2927 fld.d r15, 38*8, dr58
2928 fld.d r15, 37*8, dr56
2929 fld.d r15, 36*8, dr54
2930 fld.d r15, 35*8, dr52
2931 fld.d r15, 34*8, dr50
2932 fld.d r15, 33*8, dr48
2933 fld.d r15, 32*8, dr46
2934 fld.d r15, 31*8, dr44
2935 fld.d r15, 30*8, dr42
2936 fld.d r15, 29*8, dr40
2937 fld.d r15, 28*8, dr38
2938 fld.d r15, 27*8, dr36
2940 #else /* ! __SH4_NOFPU__ */
2941 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2942 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2943 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2944 #endif /* ! __SH4_NOFPU__ */
2981 #ifndef __SH4_NOFPU__
2982 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2984 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2986 #endif /* __SH5__ == 32 */
2987 #endif /* L_push_pop_shmedia_regs */
2991 #if defined(__pic__) && defined(__SHMEDIA__)
2992 .global GLOBAL(sdivsi3)
2993 FUNC(GLOBAL(sdivsi3))
2995 .section .text..SHmedia32,"ax"
3000 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3001 in a text section does not work (at least for shared libraries):
3002 the linker sets the LSB of the address as if this was SHmedia code. */
3003 #define TEXT_DATA_BUG
3007 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3009 .global GLOBAL(sdivsi3)
3011 #ifdef TEXT_DATA_BUG
3012 ptb datalabel Local_div_table,tr0
3014 ptb GLOBAL(div_table_internal),tr0
3017 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3018 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3021 ldx.ub r20, r21, r19 // u0.8
3022 shari r25, 32, r25 // normalize to s2.30
3024 muls.l r25, r19, r19 // s2.38
3025 ldx.w r20, r21, r21 // s2.14
3027 shari r19, 24, r19 // truncate to s2.14
3028 sub r21, r19, r19 // some 11 bit inverse in s1.14
3029 muls.l r19, r19, r21 // u0.28
3032 muls.l r25, r21, r18 // s2.58
3033 shlli r19, 45, r19 // multiply by two and convert to s2.58
3036 shari r18, 28, r18 // some 22 bit inverse in s1.30
3037 muls.l r18, r25, r0 // s2.60
3038 muls.l r18, r4, r25 // s32.30
3040 shari r0, 16, r19 // s-16.44
3041 muls.l r19, r18, r19 // s-16.74
3043 shari r4, 14, r18 // s19.-14
3044 shari r19, 30, r19 // s-16.44
3045 muls.l r19, r18, r19 // s15.30
3046 xor r21, r0, r21 // You could also use the constant 1 << 27.
3052 ENDFUNC(GLOBAL(sdivsi3))
3053 /* This table has been generated by divtab.c .
3054 Defects for bias -330:
3055 Max defect: 6.081536e-07 at -1.000000e+00
3056 Min defect: 2.849516e-08 at 1.030651e+00
3057 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3058 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3059 Defect at 1: 1.238659e-07
3060 Defect at -2: 1.061708e-07 */
3061 #else /* ! __pic__ || ! __SHMEDIA__ */
3063 #endif /* __pic__ */
3064 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3066 .type Local_div_table,@object
3067 .size Local_div_table,128
3068 /* negative division constants */
3085 /* negative division factors */
3105 /* positive division factors */
3122 /* positive division constants */
3140 #endif /* TEXT_DATA_BUG */
3142 .type GLOBAL(div_table),@object
3143 .size GLOBAL(div_table),128
3144 /* negative division constants */
3161 /* negative division factors */
3179 .global GLOBAL(div_table)
3181 HIDDEN_ALIAS(div_table_internal,div_table)
3183 /* positive division factors */
3200 /* positive division constants */
3218 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3219 /* This code used shld, thus is not suitable for SH1 / SH2. */
3221 /* Signed / unsigned division without use of FPU, optimized for SH4.
3222 Uses a lookup table for divisors in the range -128 .. +128, and
3223 div1 with case distinction for larger divisors in three more ranges.
3224 The code is lumped together with the table to allow the use of mova. */
3225 #ifdef __LITTLE_ENDIAN__
3236 .global GLOBAL(udivsi3_i4i)
3237 FUNC(GLOBAL(udivsi3_i4i))
3238 GLOBAL(udivsi3_i4i):
3239 mov.w LOCAL(c128_w), r1
3245 bf LOCAL(udiv_le128)
3247 bf LOCAL(udiv_ge64k)
3260 mova LOCAL(div_table_ix),r0
3261 bra LOCAL(div_le128_2)
3265 mova LOCAL(div_table_ix),r0
3269 mova LOCAL(div_table_inv),r0
3273 mova LOCAL(div_table_clz),r0
3276 bt/s LOCAL(div_by_1)
3287 LOCAL(div_by_1_neg):
3298 bra LOCAL(div_ge64k_2)
3310 mov.l LOCAL(zero_l),r1
3316 mov.w LOCAL(m256_w),r1
3318 mov.b r0,@(L_LSWMSB,r15)
3321 bra LOCAL(div_ge64k_end)
3343 rotcl r0; div1 r5,r1
3352 ENDFUNC(GLOBAL(udivsi3_i4i))
3354 .global GLOBAL(sdivsi3_i4i)
3355 FUNC(GLOBAL(sdivsi3_i4i))
3356 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3357 but we effectively clobber only r1. */
3358 GLOBAL(sdivsi3_i4i):
3361 mov.w LOCAL(c128_w), r1
3362 bt/s LOCAL(pos_divisor)
3366 bt/s LOCAL(neg_result)
3375 bf/s LOCAL(div_ge64k)
3383 mov.l LOCAL(zero_l),r1
3390 mov.b r0,@(L_MSWLSB,r15)
3396 mov.b r0,@(L_LSWMSB,r15)
3397 LOCAL(div_ge64k_end):
3401 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3409 LOCAL(div_le128_neg):
3411 mova LOCAL(div_table_ix),r0
3413 mova LOCAL(div_table_inv),r0
3414 bt/s LOCAL(div_by_1_neg)
3416 mova LOCAL(div_table_clz),r0
3431 bt/s LOCAL(pos_result)
3436 bf LOCAL(div_le128_neg)
3440 bf/s LOCAL(div_ge64k_neg)
3443 mov.l LOCAL(zero_l),r1
3450 mov.b r0,@(L_MSWLSB,r15)
3456 mov.b r0,@(L_LSWMSB,r15)
3457 LOCAL(div_ge64k_neg_end):
3461 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3465 LOCAL(div_r8_neg_end):
3471 LOCAL(div_ge64k_neg):
3472 bt/s LOCAL(div_r8_neg)
3475 mov.l LOCAL(zero_l),r1
3481 mov.w LOCAL(m256_w),r1
3483 mov.b r0,@(L_LSWMSB,r15)
3486 bra LOCAL(div_ge64k_neg_end)
3499 rotcl r1; div1 r5,r0
3503 bra LOCAL(div_r8_neg_end)
3508 /* This table has been generated by divtab-sh4.c. */
3510 LOCAL(div_table_clz):
3639 /* Lookup table translating positive divisor to index into table of
3640 normalized inverse. N.B. the '0' entry is also the last entry of the
3641 previous table, and causes an unaligned access for division by zero. */
3642 LOCAL(div_table_ix):
3772 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3807 LOCAL(div_table_inv):
3840 /* maximum error: 0.987342 scaled: 0.921875*/
3842 ENDFUNC(GLOBAL(sdivsi3_i4i))
3843 #endif /* SH3 / SH4 */
3845 #endif /* L_div_table */
3847 #ifdef L_udiv_qrnnd_16
3849 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3850 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3851 /* n1 < d, but n1 might be larger than d1. */
3852 .global GLOBAL(udiv_qrnnd_16)
3854 GLOBAL(udiv_qrnnd_16):
3891 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3892 #endif /* !__SHMEDIA__ */
3893 #endif /* L_udiv_qrnnd_16 */