1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
2 Free Software Foundation
, Inc.
4 This file is free software
; you can redistribute it and/or modify it
5 under the terms of the GNU General
Public License as published by the
6 Free Software Foundation
; either version 2, or (at your option) any
9 In addition to the permissions
in the GNU General
Public License
, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of
this file
into combinations with other programs
,
12 and to distribute those combinations without any restriction coming
13 from the use of
this file.
(The General
Public License restrictions
14 do apply
in other respects
; for example, they cover modification of
15 the file
, and distribution when
not linked
into a combine
18 This file is distributed
in the hope that it will be useful
, but
19 WITHOUT ANY WARRANTY
; without even the implied warranty of
20 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General
Public License for more details.
23 You should have received a copy of the GNU General
Public License
24 along with
this program
; see the file COPYING. If not, write to
25 the Free Software Foundation
, 59 Temple Place
- Suite
330,
26 Boston
, MA
02111-1307, USA.
*/
28 !! libgcc routines for the Renesas
/ SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
33 !! recoded
in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
37 ELF
local label prefixes by J
"orn Rennecke
41 #define LOCAL(X) .L_##X
42 #define FUNC(X) .type X,@function
43 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
44 #define ENDFUNC(X) ENDFUNC0(X)
46 #define LOCAL(X) L_##X
51 #define CONCAT(A,B) A##B
52 #define GLOBAL0(U,X) CONCAT(U,__##X)
53 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
55 #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
57 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
68 .global GLOBAL(ashiftrt_r4_0)
69 .global GLOBAL(ashiftrt_r4_1)
70 .global GLOBAL(ashiftrt_r4_2)
71 .global GLOBAL(ashiftrt_r4_3)
72 .global GLOBAL(ashiftrt_r4_4)
73 .global GLOBAL(ashiftrt_r4_5)
74 .global GLOBAL(ashiftrt_r4_6)
75 .global GLOBAL(ashiftrt_r4_7)
76 .global GLOBAL(ashiftrt_r4_8)
77 .global GLOBAL(ashiftrt_r4_9)
78 .global GLOBAL(ashiftrt_r4_10)
79 .global GLOBAL(ashiftrt_r4_11)
80 .global GLOBAL(ashiftrt_r4_12)
81 .global GLOBAL(ashiftrt_r4_13)
82 .global GLOBAL(ashiftrt_r4_14)
83 .global GLOBAL(ashiftrt_r4_15)
84 .global GLOBAL(ashiftrt_r4_16)
85 .global GLOBAL(ashiftrt_r4_17)
86 .global GLOBAL(ashiftrt_r4_18)
87 .global GLOBAL(ashiftrt_r4_19)
88 .global GLOBAL(ashiftrt_r4_20)
89 .global GLOBAL(ashiftrt_r4_21)
90 .global GLOBAL(ashiftrt_r4_22)
91 .global GLOBAL(ashiftrt_r4_23)
92 .global GLOBAL(ashiftrt_r4_24)
93 .global GLOBAL(ashiftrt_r4_25)
94 .global GLOBAL(ashiftrt_r4_26)
95 .global GLOBAL(ashiftrt_r4_27)
96 .global GLOBAL(ashiftrt_r4_28)
97 .global GLOBAL(ashiftrt_r4_29)
98 .global GLOBAL(ashiftrt_r4_30)
99 .global GLOBAL(ashiftrt_r4_31)
100 .global GLOBAL(ashiftrt_r4_32)
102 FUNC(GLOBAL(ashiftrt_r4_0))
103 FUNC(GLOBAL(ashiftrt_r4_1))
104 FUNC(GLOBAL(ashiftrt_r4_2))
105 FUNC(GLOBAL(ashiftrt_r4_3))
106 FUNC(GLOBAL(ashiftrt_r4_4))
107 FUNC(GLOBAL(ashiftrt_r4_5))
108 FUNC(GLOBAL(ashiftrt_r4_6))
109 FUNC(GLOBAL(ashiftrt_r4_7))
110 FUNC(GLOBAL(ashiftrt_r4_8))
111 FUNC(GLOBAL(ashiftrt_r4_9))
112 FUNC(GLOBAL(ashiftrt_r4_10))
113 FUNC(GLOBAL(ashiftrt_r4_11))
114 FUNC(GLOBAL(ashiftrt_r4_12))
115 FUNC(GLOBAL(ashiftrt_r4_13))
116 FUNC(GLOBAL(ashiftrt_r4_14))
117 FUNC(GLOBAL(ashiftrt_r4_15))
118 FUNC(GLOBAL(ashiftrt_r4_16))
119 FUNC(GLOBAL(ashiftrt_r4_17))
120 FUNC(GLOBAL(ashiftrt_r4_18))
121 FUNC(GLOBAL(ashiftrt_r4_19))
122 FUNC(GLOBAL(ashiftrt_r4_20))
123 FUNC(GLOBAL(ashiftrt_r4_21))
124 FUNC(GLOBAL(ashiftrt_r4_22))
125 FUNC(GLOBAL(ashiftrt_r4_23))
126 FUNC(GLOBAL(ashiftrt_r4_24))
127 FUNC(GLOBAL(ashiftrt_r4_25))
128 FUNC(GLOBAL(ashiftrt_r4_26))
129 FUNC(GLOBAL(ashiftrt_r4_27))
130 FUNC(GLOBAL(ashiftrt_r4_28))
131 FUNC(GLOBAL(ashiftrt_r4_29))
132 FUNC(GLOBAL(ashiftrt_r4_30))
133 FUNC(GLOBAL(ashiftrt_r4_31))
134 FUNC(GLOBAL(ashiftrt_r4_32))
137 GLOBAL(ashiftrt_r4_32):
138 GLOBAL(ashiftrt_r4_31):
143 GLOBAL(ashiftrt_r4_30):
145 GLOBAL(ashiftrt_r4_29):
147 GLOBAL(ashiftrt_r4_28):
149 GLOBAL(ashiftrt_r4_27):
151 GLOBAL(ashiftrt_r4_26):
153 GLOBAL(ashiftrt_r4_25):
155 GLOBAL(ashiftrt_r4_24):
161 GLOBAL(ashiftrt_r4_23):
163 GLOBAL(ashiftrt_r4_22):
165 GLOBAL(ashiftrt_r4_21):
167 GLOBAL(ashiftrt_r4_20):
169 GLOBAL(ashiftrt_r4_19):
171 GLOBAL(ashiftrt_r4_18):
173 GLOBAL(ashiftrt_r4_17):
175 GLOBAL(ashiftrt_r4_16):
180 GLOBAL(ashiftrt_r4_15):
182 GLOBAL(ashiftrt_r4_14):
184 GLOBAL(ashiftrt_r4_13):
186 GLOBAL(ashiftrt_r4_12):
188 GLOBAL(ashiftrt_r4_11):
190 GLOBAL(ashiftrt_r4_10):
192 GLOBAL(ashiftrt_r4_9):
194 GLOBAL(ashiftrt_r4_8):
196 GLOBAL(ashiftrt_r4_7):
198 GLOBAL(ashiftrt_r4_6):
200 GLOBAL(ashiftrt_r4_5):
202 GLOBAL(ashiftrt_r4_4):
204 GLOBAL(ashiftrt_r4_3):
206 GLOBAL(ashiftrt_r4_2):
208 GLOBAL(ashiftrt_r4_1):
212 GLOBAL(ashiftrt_r4_0):
216 ENDFUNC(GLOBAL(ashiftrt_r4_0))
217 ENDFUNC(GLOBAL(ashiftrt_r4_1))
218 ENDFUNC(GLOBAL(ashiftrt_r4_2))
219 ENDFUNC(GLOBAL(ashiftrt_r4_3))
220 ENDFUNC(GLOBAL(ashiftrt_r4_4))
221 ENDFUNC(GLOBAL(ashiftrt_r4_5))
222 ENDFUNC(GLOBAL(ashiftrt_r4_6))
223 ENDFUNC(GLOBAL(ashiftrt_r4_7))
224 ENDFUNC(GLOBAL(ashiftrt_r4_8))
225 ENDFUNC(GLOBAL(ashiftrt_r4_9))
226 ENDFUNC(GLOBAL(ashiftrt_r4_10))
227 ENDFUNC(GLOBAL(ashiftrt_r4_11))
228 ENDFUNC(GLOBAL(ashiftrt_r4_12))
229 ENDFUNC(GLOBAL(ashiftrt_r4_13))
230 ENDFUNC(GLOBAL(ashiftrt_r4_14))
231 ENDFUNC(GLOBAL(ashiftrt_r4_15))
232 ENDFUNC(GLOBAL(ashiftrt_r4_16))
233 ENDFUNC(GLOBAL(ashiftrt_r4_17))
234 ENDFUNC(GLOBAL(ashiftrt_r4_18))
235 ENDFUNC(GLOBAL(ashiftrt_r4_19))
236 ENDFUNC(GLOBAL(ashiftrt_r4_20))
237 ENDFUNC(GLOBAL(ashiftrt_r4_21))
238 ENDFUNC(GLOBAL(ashiftrt_r4_22))
239 ENDFUNC(GLOBAL(ashiftrt_r4_23))
240 ENDFUNC(GLOBAL(ashiftrt_r4_24))
241 ENDFUNC(GLOBAL(ashiftrt_r4_25))
242 ENDFUNC(GLOBAL(ashiftrt_r4_26))
243 ENDFUNC(GLOBAL(ashiftrt_r4_27))
244 ENDFUNC(GLOBAL(ashiftrt_r4_28))
245 ENDFUNC(GLOBAL(ashiftrt_r4_29))
246 ENDFUNC(GLOBAL(ashiftrt_r4_30))
247 ENDFUNC(GLOBAL(ashiftrt_r4_31))
248 ENDFUNC(GLOBAL(ashiftrt_r4_32))
270 .global GLOBAL(ashrsi3)
271 FUNC(GLOBAL(ashrsi3))
276 mova LOCAL(ashrsi3_table),r0
287 LOCAL(ashrsi3_table):
288 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
318 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
319 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
399 ENDFUNC(GLOBAL(ashrsi3))
420 .global GLOBAL(ashlsi3)
421 FUNC(GLOBAL(ashlsi3))
426 mova LOCAL(ashlsi3_table),r0
437 LOCAL(ashlsi3_table):
438 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
468 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
469 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
558 ENDFUNC(GLOBAL(ashlsi3))
579 .global GLOBAL(lshrsi3)
580 FUNC(GLOBAL(lshrsi3))
585 mova LOCAL(lshrsi3_table),r0
596 LOCAL(lshrsi3_table):
597 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
627 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
628 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
717 ENDFUNC(GLOBAL(lshrsi3))
722 ! done all the large groups, do the remainder
727 mova GLOBAL(movmemSI0),r0
733 ! ??? We need aliases movstr* for movmem* for the older libraries. These
734 ! aliases will be removed at the some point in the future.
735 .global GLOBAL(movmemSI64)
736 FUNC(GLOBAL(movmemSI64))
737 ALIAS(movstrSI64,movmemSI64)
741 .global GLOBAL(movmemSI60)
742 FUNC(GLOBAL(movmemSI60))
743 ALIAS(movstrSI60,movmemSI60)
747 .global GLOBAL(movmemSI56)
748 FUNC(GLOBAL(movmemSI56))
749 ALIAS(movstrSI56,movmemSI56)
753 .global GLOBAL(movmemSI52)
754 FUNC(GLOBAL(movmemSI52))
755 ALIAS(movstrSI52,movmemSI52)
759 .global GLOBAL(movmemSI48)
760 FUNC(GLOBAL(movmemSI48))
761 ALIAS(movstrSI48,movmemSI48)
765 .global GLOBAL(movmemSI44)
766 FUNC(GLOBAL(movmemSI44))
767 ALIAS(movstrSI44,movmemSI44)
771 .global GLOBAL(movmemSI40)
772 FUNC(GLOBAL(movmemSI40))
773 ALIAS(movstrSI40,movmemSI40)
777 .global GLOBAL(movmemSI36)
778 FUNC(GLOBAL(movmemSI36))
779 ALIAS(movstrSI36,movmemSI36)
783 .global GLOBAL(movmemSI32)
784 FUNC(GLOBAL(movmemSI32))
785 ALIAS(movstrSI32,movmemSI32)
789 .global GLOBAL(movmemSI28)
790 FUNC(GLOBAL(movmemSI28))
791 ALIAS(movstrSI28,movmemSI28)
795 .global GLOBAL(movmemSI24)
796 FUNC(GLOBAL(movmemSI24))
797 ALIAS(movstrSI24,movmemSI24)
801 .global GLOBAL(movmemSI20)
802 FUNC(GLOBAL(movmemSI20))
803 ALIAS(movstrSI20,movmemSI20)
807 .global GLOBAL(movmemSI16)
808 FUNC(GLOBAL(movmemSI16))
809 ALIAS(movstrSI16,movmemSI16)
813 .global GLOBAL(movmemSI12)
814 FUNC(GLOBAL(movmemSI12))
815 ALIAS(movstrSI12,movmemSI12)
819 .global GLOBAL(movmemSI8)
820 FUNC(GLOBAL(movmemSI8))
821 ALIAS(movstrSI8,movmemSI8)
825 .global GLOBAL(movmemSI4)
826 FUNC(GLOBAL(movmemSI4))
827 ALIAS(movstrSI4,movmemSI4)
831 .global GLOBAL(movmemSI0)
832 FUNC(GLOBAL(movmemSI0))
833 ALIAS(movstrSI0,movmemSI0)
838 ENDFUNC(GLOBAL(movmemSI64))
839 ENDFUNC(GLOBAL(movmemSI60))
840 ENDFUNC(GLOBAL(movmemSI56))
841 ENDFUNC(GLOBAL(movmemSI52))
842 ENDFUNC(GLOBAL(movmemSI48))
843 ENDFUNC(GLOBAL(movmemSI44))
844 ENDFUNC(GLOBAL(movmemSI40))
845 ENDFUNC(GLOBAL(movmemSI36))
846 ENDFUNC(GLOBAL(movmemSI32))
847 ENDFUNC(GLOBAL(movmemSI28))
848 ENDFUNC(GLOBAL(movmemSI24))
849 ENDFUNC(GLOBAL(movmemSI20))
850 ENDFUNC(GLOBAL(movmemSI16))
851 ENDFUNC(GLOBAL(movmemSI12))
852 ENDFUNC(GLOBAL(movmemSI8))
853 ENDFUNC(GLOBAL(movmemSI4))
854 ENDFUNC(GLOBAL(movmemSI0))
858 .global GLOBAL(movmem)
923 .global GLOBAL(movmem_i4_even)
924 .global GLOBAL(movmem_i4_odd)
925 .global GLOBAL(movmemSI12_i4)
927 FUNC(GLOBAL(movmem_i4_even))
928 FUNC(GLOBAL(movmem_i4_odd))
929 FUNC(GLOBAL(movmemSI12_i4))
931 ALIAS(movstr_i4_even,movmem_i4_even)
932 ALIAS(movstr_i4_odd,movmem_i4_odd)
933 ALIAS(movstrSI12_i4,movmemSI12_i4)
943 GLOBAL(movmem_i4_even):
945 bra L_movmem_start_even
948 GLOBAL(movmem_i4_odd):
960 bt/s L_movmem_2mod4_end
974 ENDFUNC(GLOBAL(movmem_i4_even))
975 ENDFUNC(GLOBAL(movmem_i4_odd))
978 GLOBAL(movmemSI12_i4):
987 ENDFUNC(GLOBAL(movmemSI12_i4))
993 .global GLOBAL(mulsi3)
998 ! r0 = aabb*ccdd via partial products
1000 ! if aa == 0 and cc = 0
1004 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
1008 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
1009 mov r5,r3 ! r3 = ccdd
1010 swap.w r4,r2 ! r2 = bbaa
1011 xtrct r2,r3 ! r3 = aacc
1012 tst r3,r3 ! msws zero ?
1014 rts ! yes - then we have the answer
1017 hiset: sts macl,r0 ! r0 = bb*dd
1018 mulu.w r2,r5 ! brewing macl = aa*dd
1020 mulu.w r3,r4 ! brewing macl = cc*bb
1027 FUNC(GLOBAL(mulsi3))
1029 #endif /* ! __SH5__ */
1032 !! 4 byte integer Divide code for the Renesas SH
1034 !! args in r4 and r5, result in fpul, clobber dr0, dr2
1036 .global GLOBAL(sdivsi3_i4)
1037 FUNC(GLOBAL(sdivsi3_i4))
1047 ENDFUNC(GLOBAL(sdivsi3_i4))
1048 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1049 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1051 #if ! __SH5__ || __SH5__ == 32
1055 .global GLOBAL(sdivsi3_i4)
1056 FUNC(GLOBAL(sdivsi3_i4))
1071 ENDFUNC(GLOBAL(sdivsi3_i4))
1072 #endif /* ! __SH5__ || __SH5__ == 32 */
1073 #endif /* ! __SH4__ */
1077 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1079 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1081 !! Steve Chamberlain
1086 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1088 .global GLOBAL(sdivsi3)
1089 FUNC(GLOBAL(sdivsi3))
1092 .section .text..SHmedia32,"ax"
1098 /* The assembly code that follows is a hand-optimized version of the C
1099 code that follows. Note that the registers that are modified are
1100 exactly those listed as clobbered in the patterns divsi3_i1 and
1103 int __sdivsi3 (i, j)
1106 register unsigned long long r18 asm ("r18
");
1107 register unsigned long long r19 asm ("r19
");
1108 register unsigned long long r0 asm ("r0
") = 0;
1109 register unsigned long long r1 asm ("r1
") = 1;
1110 register int r2 asm ("r2
") = i >> 31;
1111 register int r3 asm ("r3
") = j >> 31;
1123 r0 |= r1, r18 -= r19;
1124 while (r19 >>= 1, r1 >>= 1);
1126 return r2 * (int)r0;
1130 pt/l LOCAL(sdivsi3_dontadd), tr2
1131 pt/l LOCAL(sdivsi3_loop), tr1
1144 LOCAL(sdivsi3_loop):
1148 LOCAL(sdivsi3_dontadd):
1157 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1160 // can create absolute value without extra latency,
1161 // but dependent on proper sign extension of inputs:
1164 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1167 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1168 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1176 // If r4 was to be used in-place instead of r21, could use this sequence
1177 // to compute absolute:
1178 // sub r63,r4,r19 // compute absolute value of r4
1179 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1180 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1192 mmacnfx.wl r25,r2,r1
1218 #elif defined __SHMEDIA__
1219 /* m5compact-nofpu */
1220 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1222 .section .text..SHmedia32,"ax"
1225 pt/l LOCAL(sdivsi3_dontsub), tr0
1226 pt/l LOCAL(sdivsi3_loop), tr1
1238 LOCAL(sdivsi3_loop):
1242 LOCAL(sdivsi3_dontsub):
1248 #else /* ! __SHMEDIA__ */
1333 ENDFUNC(GLOBAL(sdivsi3))
1334 #endif /* ! __SHMEDIA__ */
1335 #endif /* ! __SH4__ */
1340 !! 4 byte integer Divide code for the Renesas SH
1342 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1345 .global GLOBAL(udivsi3_i4)
1346 FUNC(GLOBAL(udivsi3_i4))
1358 #ifdef __LITTLE_ENDIAN__
1382 .align 3 ! make double below 8 byte aligned.
1387 ENDFUNC(GLOBAL(udivsi3_i4))
1388 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1389 #if ! __SH5__ || __SH5__ == 32
1390 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1392 .global GLOBAL(udivsi3_i4)
1393 FUNC(GLOBAL(udivsi3_i4))
1407 ENDFUNC(GLOBAL(udivsi3_i4))
1408 #endif /* ! __SH5__ || __SH5__ == 32 */
1409 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1410 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1412 .global GLOBAL(udivsi3_i4)
1426 #ifdef __LITTLE_ENDIAN__
1446 .align 3 ! make double below 8 byte aligned.
1461 ENDFUNC(GLOBAL(udivsi3_i4))
1462 #endif /* ! __SH4__ */
1466 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1468 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1470 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1471 .global GLOBAL(udivsi3)
1472 FUNC(GLOBAL(udivsi3))
1476 .section .text..SHmedia32,"ax"
1482 /* The assembly code that follows is a hand-optimized version of the C
1483 code that follows. Note that the registers that are modified are
1484 exactly those listed as clobbered in the patterns udivsi3_i1 and
1491 register unsigned long long r0 asm ("r0
") = 0;
1492 register unsigned long long r18 asm ("r18
") = 1;
1493 register unsigned long long r4 asm ("r4
") = i;
1494 register unsigned long long r19 asm ("r19
") = j;
1500 r0 |= r18, r4 -= r19;
1501 while (r19 >>= 1, r18 >>= 1);
1507 pt/l LOCAL(udivsi3_dontadd), tr2
1508 pt/l LOCAL(udivsi3_loop), tr1
1516 LOCAL(udivsi3_loop):
1520 LOCAL(udivsi3_dontadd):
1528 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1534 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1536 mmulfx.w r21,r21,r19
1537 mshflo.w r21,r63,r21
1539 mmulfx.w r25,r19,r19
1543 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1544 before the msub.w, but we need a different value for
1545 r19 to keep errors under control. */
1547 mmulfx.w r19,r19,r19
1551 mmacnfx.wl r25,r19,r21
1576 #elif defined (__SHMEDIA__)
1577 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1578 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1579 So use a short shmedia loop. */
1580 // clobbered: r20,r21,r25,tr0,tr1,tr2
1582 .section .text..SHmedia32,"ax"
1585 pt/l LOCAL(udivsi3_dontsub), tr0
1586 pt/l LOCAL(udivsi3_loop), tr1
1591 LOCAL(udivsi3_loop):
1595 LOCAL(udivsi3_dontsub):
1600 #else /* ! defined (__SHMEDIA__) */
1604 div1 r5,r4; div1 r5,r4; div1 r5,r4
1605 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1608 div1 r5,r4; rotcl r0
1609 div1 r5,r4; rotcl r0
1610 div1 r5,r4; rotcl r0
1618 bf LOCAL(large_divisor)
1620 bf/s LOCAL(large_divisor)
1642 LOCAL(large_divisor):
1661 ENDFUNC(GLOBAL(udivsi3))
1662 #endif /* ! __SHMEDIA__ */
1663 #endif /* __SH4__ */
1664 #endif /* L_udivsi3 */
1669 .section .text..SHmedia32,"ax"
1671 .global GLOBAL(udivdi3)
1672 FUNC(GLOBAL(udivdi3))
1678 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1682 sub r63,r22,r20 // r63 == 64 % 64
1684 pta LOCAL(large_divisor),tr0
1690 bgt/u r9,r63,tr0 // large_divisor
1699 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1700 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1701 the case may be, %0000000000000000 000.11111111111, still */
1702 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1707 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1709 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1718 mcmpgt.l r21,r63,r21 // See Note 1
1720 mshfhi.l r63,r21,r21
1724 /* small divisor: need a third divide step */
1734 /* could test r3 here to check for divide by zero. */
1737 LOCAL(large_divisor):
1746 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1747 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1748 the case may be, %0000000000000000 000.11111111111, still */
1749 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1754 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1756 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1760 pta LOCAL(no_lo_adj),tr0
1767 bgtu/u r7,r25,tr0 // no_lo_adj
1773 /* large_divisor: only needs a few adjustments. */
1780 ENDFUNC(GLOBAL(udivdi3))
1781 /* Note 1: To shift the result of the second divide stage so that the result
1782 always fits into 32 bits, yet we still reduce the rest sufficiently
1783 would require a lot of instructions to do the shifts just right. Using
1784 the full 64 bit shift result to multiply with the divisor would require
1785 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1786 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1787 know that the rest after taking this partial result into account will
1788 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1789 upper 32 bits of the partial result are nonzero. */
1790 #endif /* __SHMEDIA__ */
1791 #endif /* L_udivdi3 */
1796 .section .text..SHmedia32,"ax"
1798 .global GLOBAL(divdi3)
1799 FUNC(GLOBAL(divdi3))
1801 pta GLOBAL(udivdi3),tr0
1813 ENDFUNC(GLOBAL(divdi3))
1814 #endif /* __SHMEDIA__ */
1815 #endif /* L_divdi3 */
1820 .section .text..SHmedia32,"ax"
1822 .global GLOBAL(umoddi3)
1823 FUNC(GLOBAL(umoddi3))
1829 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1833 sub r63,r22,r20 // r63 == 64 % 64
1835 pta LOCAL(large_divisor),tr0
1841 bgt/u r9,r63,tr0 // large_divisor
1850 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1851 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1852 the case may be, %0000000000000000 000.11111111111, still */
1853 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1858 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1860 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1865 /* bubble */ /* could test r3 here to check for divide by zero. */
1868 mcmpgt.l r21,r63,r21 // See Note 1
1870 mshfhi.l r63,r21,r21
1874 /* small divisor: need a third divide step */
1877 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1887 LOCAL(large_divisor):
1896 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1897 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1898 the case may be, %0000000000000000 000.11111111111, still */
1899 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1904 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1906 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1910 pta LOCAL(no_lo_adj),tr0
1917 bgtu/u r7,r25,tr0 // no_lo_adj
1923 /* large_divisor: only needs a few adjustments. */
1932 ENDFUNC(GLOBAL(umoddi3))
1933 /* Note 1: To shift the result of the second divide stage so that the result
1934 always fits into 32 bits, yet we still reduce the rest sufficiently
1935 would require a lot of instructions to do the shifts just right. Using
1936 the full 64 bit shift result to multiply with the divisor would require
1937 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1938 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1939 know that the rest after taking this partial result into account will
1940 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1941 upper 32 bits of the partial result are nonzero. */
1942 #endif /* __SHMEDIA__ */
1943 #endif /* L_umoddi3 */
1948 .section .text..SHmedia32,"ax"
1950 .global GLOBAL(moddi3)
1951 FUNC(GLOBAL(moddi3))
1953 pta GLOBAL(umoddi3),tr0
1965 ENDFUNC(GLOBAL(moddi3))
1966 #endif /* __SHMEDIA__ */
1967 #endif /* L_moddi3 */
1970 #if !defined (__SH2A_NOFPU__)
1971 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1975 .global GLOBAL(set_fpscr)
1976 FUNC(GLOBAL(set_fpscr))
1981 mova LOCAL(set_fpscr_L0),r0
1982 mov.l LOCAL(set_fpscr_L0),r12
1984 mov.l LOCAL(set_fpscr_L1),r0
1988 mov.l LOCAL(set_fpscr_L1),r1
1995 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1998 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2007 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2011 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2018 LOCAL(set_fpscr_L0):
2019 .long _GLOBAL_OFFSET_TABLE_
2020 LOCAL(set_fpscr_L1):
2021 .long GLOBAL(fpscr_values@GOT)
2023 LOCAL(set_fpscr_L1):
2024 .long GLOBAL(fpscr_values)
2027 ENDFUNC(GLOBAL(set_fpscr))
2028 #ifndef NO_FPSCR_VALUES
2030 .comm GLOBAL(fpscr_values),8,4
2032 .comm GLOBAL(fpscr_values),8
2034 #endif /* NO_FPSCR_VALUES */
2035 #endif /* SH2E / SH3E / SH4 */
2036 #endif /* __SH2A_NOFPU__ */
2037 #endif /* L_set_fpscr */
2038 #ifdef L_ic_invalidate
2041 .section .text..SHmedia32,"ax"
2043 .global GLOBAL(init_trampoline)
2044 FUNC(GLOBAL(init_trampoline))
2045 GLOBAL(init_trampoline):
2047 #ifdef __LITTLE_ENDIAN__
2053 movi 0xffffffffffffd002,r20
2060 .global GLOBAL(ic_invalidate)
2061 FUNC(GLOBAL(ic_invalidate))
2062 GLOBAL(ic_invalidate):
2070 ENDFUNC(GLOBAL(ic_invalidate))
2071 ENDFUNC(GLOBAL(init_trampoline))
2072 #elif defined(__SH4A__)
2073 .global GLOBAL(ic_invalidate)
2074 FUNC(GLOBAL(ic_invalidate))
2075 GLOBAL(ic_invalidate):
2080 ENDFUNC(GLOBAL(ic_invalidate))
2081 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
2082 /* This assumes a direct-mapped cache, which is the case for
2083 the first SH4, but not for the second version of SH4, that
2084 uses a 2-way set-associative cache, nor SH4a, that is 4-way.
2085 SH4a fortunately offers an instruction to invalidate the
2086 instruction cache, and we use it above, but SH4 doesn't.
2087 However, since the libraries don't contain any nested
2088 functions (the only case in which GCC would emit this pattern)
2089 and we actually emit the ic_invalidate_line_i pattern for
2090 cache invalidation on all SH4 multilibs (even 4-nofpu, that
2091 isn't even corevered here), and pre-SH4 cores don't have
2092 caches, it seems like this code is pointless, unless it's
2093 meant for backward binary compatibility or for userland-only
2094 cache invalidation for say sh4-*-linux-gnu. Such a feature
2095 should probably be moved into a system call, such that the
2096 kernel could do whatever it takes to invalidate a cache line
2097 on the core it's actually running on. I.e., this hideous :-)
2098 piece of code should go away at some point. */
2100 .global GLOBAL(ic_invalidate)
2101 FUNC(GLOBAL(ic_invalidate))
2102 GLOBAL(ic_invalidate):
2106 /* Compute how many cache lines 0f is away from r4. */
2109 /* Prepare to branch to 0f plus the cache-line offset. */
2116 /* This must be aligned to the beginning of a cache line. */
2118 .rept 256 /* There are 256 cache lines of 32 bytes. */
2125 ENDFUNC(GLOBAL(ic_invalidate))
2127 #endif /* L_ic_invalidate */
2129 #if defined (__SH5__) && __SH5__ == 32
2130 #ifdef L_shcompact_call_trampoline
2133 LOCAL(ct_main_table):
2134 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2135 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2136 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2137 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2138 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2139 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2140 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2141 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2142 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2143 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2144 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2145 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2146 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2147 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2148 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2149 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2150 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2151 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2152 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2153 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2154 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2155 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2156 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2157 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2158 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2159 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2160 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2161 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2162 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2163 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2164 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2165 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2166 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2168 .section .text..SHmedia32, "ax"
2171 /* This function loads 64-bit general-purpose registers from the
2172 stack, from a memory address contained in them or from an FP
2173 register, according to a cookie passed in r1. Its execution
2174 time is linear on the number of registers that actually have
2175 to be copied. See sh.h for details on the actual bit pattern.
2177 The function to be called is passed in r0. If a 32-bit return
2178 value is expected, the actual function will be tail-called,
2179 otherwise the return address will be stored in r10 (that the
2180 caller should expect to be clobbered) and the return value
2181 will be expanded into r2/r3 upon return. */
2183 .global GLOBAL(GCC_shcompact_call_trampoline)
2184 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2185 GLOBAL(GCC_shcompact_call_trampoline):
2186 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2187 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2188 pt/l LOCAL(ct_loop), tr1
2190 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2195 LOCAL(ct_main_label):
2198 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2199 /* It must be dr0, so just do it. */
2205 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2206 /* It is either dr0 or dr2. */
2215 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2216 shlri r1, 23 - 3, r34
2217 andi r34, 3 << 3, r33
2218 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2219 LOCAL(ct_r4_fp_base):
2225 LOCAL(ct_r4_fp_copy):
2232 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2233 shlri r1, 20 - 3, r34
2234 andi r34, 3 << 3, r33
2235 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2236 LOCAL(ct_r5_fp_base):
2242 LOCAL(ct_r5_fp_copy):
2251 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2252 /* It must be dr8. */
2258 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2259 shlri r1, 16 - 3, r34
2260 andi r34, 3 << 3, r33
2261 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2262 LOCAL(ct_r6_fp_base):
2268 LOCAL(ct_r6_fp_copy):
2277 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2278 /* It is either dr8 or dr10. */
2286 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2287 shlri r1, 12 - 3, r34
2288 andi r34, 3 << 3, r33
2289 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2290 LOCAL(ct_r7_fp_base):
2295 LOCAL(ct_r7_fp_copy):
2304 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2305 /* It is either dr8 or dr10. */
2307 andi r1, 1 << 8, r32
2313 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2314 shlri r1, 8 - 3, r34
2315 andi r34, 3 << 3, r33
2316 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2317 LOCAL(ct_r8_fp_base):
2322 LOCAL(ct_r8_fp_copy):
2331 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2332 /* It is either dr8 or dr10. */
2334 andi r1, 1 << 4, r32
2340 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2341 shlri r1, 4 - 3, r34
2342 andi r34, 3 << 3, r33
2343 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2344 LOCAL(ct_r9_fp_base):
2349 LOCAL(ct_r9_fp_copy):
2358 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2359 pt/l LOCAL(ct_r2_load), tr2
2368 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2369 pt/l LOCAL(ct_r3_load), tr2
2377 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2378 pt/l LOCAL(ct_r4_load), tr2
2386 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2387 pt/l LOCAL(ct_r5_load), tr2
2395 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2396 pt/l LOCAL(ct_r6_load), tr2
2403 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2404 pt/l LOCAL(ct_r7_load), tr2
2411 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2412 pt/l LOCAL(ct_r8_load), tr2
2419 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2420 pt/l LOCAL(ct_check_tramp), tr2
2444 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2451 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2458 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2465 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2472 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2479 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2485 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2491 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2492 andi r1, 7 << 1, r30
2493 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2495 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2499 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2512 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2515 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2516 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2517 pt/u LOCAL(ct_ret_wide), tr2
2520 LOCAL(ct_call_func): /* Just branch to the function. */
2522 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2523 64-bit return value. */
2527 #if __LITTLE_ENDIAN__
2536 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2537 #endif /* L_shcompact_call_trampoline */
2539 #ifdef L_shcompact_return_trampoline
2540 /* This function does the converse of the code in `ret_wide'
2541 above. It is tail-called by SHcompact functions returning
2542 64-bit non-floating-point values, to pack the 32-bit values in
2543 r2 and r3 into r2. */
2546 .section .text..SHmedia32, "ax"
2548 .global GLOBAL(GCC_shcompact_return_trampoline)
2549 FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2550 GLOBAL(GCC_shcompact_return_trampoline):
2552 #if __LITTLE_ENDIAN__
2562 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2563 #endif /* L_shcompact_return_trampoline */
2565 #ifdef L_shcompact_incoming_args
2568 LOCAL(ia_main_table):
2569 .word 1 /* Invalid, just loop */
2570 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2571 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2572 .word 1 /* Invalid, just loop */
2573 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2574 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2575 .word 1 /* Invalid, just loop */
2576 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2577 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2578 .word 1 /* Invalid, just loop */
2579 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2580 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2581 .word 1 /* Invalid, just loop */
2582 .word 1 /* Invalid, just loop */
2583 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2584 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2585 .word 1 /* Invalid, just loop */
2586 .word 1 /* Invalid, just loop */
2587 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2588 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2589 .word 1 /* Invalid, just loop */
2590 .word 1 /* Invalid, just loop */
2591 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2592 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2593 .word 1 /* Invalid, just loop */
2594 .word 1 /* Invalid, just loop */
2595 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2596 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2597 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2598 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2599 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2600 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2601 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2603 .section .text..SHmedia32, "ax"
2606 /* This function stores 64-bit general-purpose registers back in
2607 the stack, and loads the address in which each register
2608 was stored into itself. The lower 32 bits of r17 hold the address
2609 to begin storing, and the upper 32 bits of r17 hold the cookie.
2610 Its execution time is linear on the
2611 number of registers that actually have to be copied, and it is
2612 optimized for structures larger than 64 bits, as opposed to
2613 individual `long long' arguments. See sh.h for details on the
2614 actual bit pattern. */
2616 .global GLOBAL(GCC_shcompact_incoming_args)
2617 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2618 GLOBAL(GCC_shcompact_incoming_args):
2619 ptabs/l r18, tr0 /* Prepare to return. */
2620 shlri r17, 32, r0 /* Load the cookie. */
2621 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2622 pt/l LOCAL(ia_loop), tr1
2624 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2629 LOCAL(ia_main_label):
2632 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2641 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2650 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2659 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2668 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2677 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2685 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2693 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2697 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2704 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2711 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2718 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2725 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2732 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2738 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2744 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2745 andi r0, 7 << 1, r38
2746 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2748 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2752 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2765 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2767 LOCAL(ia_return): /* Return. */
2769 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2770 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2771 #endif /* L_shcompact_incoming_args */
2774 #ifdef L_nested_trampoline
2776 .section .text..SHmedia32,"ax"
2780 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2781 .global GLOBAL(GCC_nested_trampoline)
2782 FUNC(GLOBAL(GCC_nested_trampoline))
2783 GLOBAL(GCC_nested_trampoline):
2800 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2801 #endif /* L_nested_trampoline */
2802 #endif /* __SH5__ */
2804 #ifdef L_push_pop_shmedia_regs
2805 .section .text..SHmedia32,"ax"
2808 #ifndef __SH4_NOFPU__
2809 .global GLOBAL(GCC_push_shmedia_regs)
2810 FUNC(GLOBAL(GCC_push_shmedia_regs))
2811 GLOBAL(GCC_push_shmedia_regs):
2812 addi.l r15, -14*8, r15
2813 fst.d r15, 13*8, dr62
2814 fst.d r15, 12*8, dr60
2815 fst.d r15, 11*8, dr58
2816 fst.d r15, 10*8, dr56
2817 fst.d r15, 9*8, dr54
2818 fst.d r15, 8*8, dr52
2819 fst.d r15, 7*8, dr50
2820 fst.d r15, 6*8, dr48
2821 fst.d r15, 5*8, dr46
2822 fst.d r15, 4*8, dr44
2823 fst.d r15, 3*8, dr42
2824 fst.d r15, 2*8, dr40
2825 fst.d r15, 1*8, dr38
2826 fst.d r15, 0*8, dr36
2828 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2829 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2830 GLOBAL(GCC_push_shmedia_regs_nofpu):
2832 addi.l r15, -27*8, r15
2865 #ifndef __SH4_NOFPU__
2866 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2868 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2869 #ifndef __SH4_NOFPU__
2870 .global GLOBAL(GCC_pop_shmedia_regs)
2871 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2872 GLOBAL(GCC_pop_shmedia_regs):
2875 fld.d r15, 40*8, dr62
2876 fld.d r15, 39*8, dr60
2877 fld.d r15, 38*8, dr58
2878 fld.d r15, 37*8, dr56
2879 fld.d r15, 36*8, dr54
2880 fld.d r15, 35*8, dr52
2881 fld.d r15, 34*8, dr50
2882 fld.d r15, 33*8, dr48
2883 fld.d r15, 32*8, dr46
2884 fld.d r15, 31*8, dr44
2885 fld.d r15, 30*8, dr42
2886 fld.d r15, 29*8, dr40
2887 fld.d r15, 28*8, dr38
2888 fld.d r15, 27*8, dr36
2891 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2892 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2893 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2930 #ifndef __SH4_NOFPU__
2931 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2933 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2934 #endif /* __SH5__ == 32 */
2935 #endif /* L_push_pop_shmedia_regs */