1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001
2 Free Software Foundation
, Inc.
4 This file is free software
; you can redistribute it and/or modify it
5 under the terms of the GNU General
Public License as published by the
6 Free Software Foundation
; either version 2, or (at your option) any
9 In addition to the permissions
in the GNU General
Public License
, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of
this file
into combinations with other programs
,
12 and to distribute those combinations without any restriction coming
13 from the use of
this file.
(The General
Public License restrictions
14 do apply
in other respects
; for example, they cover modification of
15 the file
, and distribution when
not linked
into a combine
18 This file is distributed
in the hope that it will be useful
, but
19 WITHOUT ANY WARRANTY
; without even the implied warranty of
20 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General
Public License for more details.
23 You should have received a copy of the GNU General
Public License
24 along with
this program
; see the file COPYING. If not, write to
25 the Free Software Foundation
, 59 Temple Place
- Suite
330,
26 Boston
, MA
02111-1307, USA.
*/
28 !! libgcc routines for the Hitachi
/ SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
33 !! recoded
in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
37 ELF
local label prefixes by J
"orn Rennecke
41 #define LOCAL(X) .L_##X
43 #define LOCAL(X) L_##X
47 #define GLOBAL(X) __##X
51 #define GLOBAL(X) ___##X
54 #if defined __SH5__ && ! defined __SH4_NOFPU__
60 .global GLOBAL(ashiftrt_r4_0)
61 .global GLOBAL(ashiftrt_r4_1)
62 .global GLOBAL(ashiftrt_r4_2)
63 .global GLOBAL(ashiftrt_r4_3)
64 .global GLOBAL(ashiftrt_r4_4)
65 .global GLOBAL(ashiftrt_r4_5)
66 .global GLOBAL(ashiftrt_r4_6)
67 .global GLOBAL(ashiftrt_r4_7)
68 .global GLOBAL(ashiftrt_r4_8)
69 .global GLOBAL(ashiftrt_r4_9)
70 .global GLOBAL(ashiftrt_r4_10)
71 .global GLOBAL(ashiftrt_r4_11)
72 .global GLOBAL(ashiftrt_r4_12)
73 .global GLOBAL(ashiftrt_r4_13)
74 .global GLOBAL(ashiftrt_r4_14)
75 .global GLOBAL(ashiftrt_r4_15)
76 .global GLOBAL(ashiftrt_r4_16)
77 .global GLOBAL(ashiftrt_r4_17)
78 .global GLOBAL(ashiftrt_r4_18)
79 .global GLOBAL(ashiftrt_r4_19)
80 .global GLOBAL(ashiftrt_r4_20)
81 .global GLOBAL(ashiftrt_r4_21)
82 .global GLOBAL(ashiftrt_r4_22)
83 .global GLOBAL(ashiftrt_r4_23)
84 .global GLOBAL(ashiftrt_r4_24)
85 .global GLOBAL(ashiftrt_r4_25)
86 .global GLOBAL(ashiftrt_r4_26)
87 .global GLOBAL(ashiftrt_r4_27)
88 .global GLOBAL(ashiftrt_r4_28)
89 .global GLOBAL(ashiftrt_r4_29)
90 .global GLOBAL(ashiftrt_r4_30)
91 .global GLOBAL(ashiftrt_r4_31)
92 .global GLOBAL(ashiftrt_r4_32)
95 GLOBAL(ashiftrt_r4_32):
96 GLOBAL(ashiftrt_r4_31):
101 GLOBAL(ashiftrt_r4_30):
103 GLOBAL(ashiftrt_r4_29):
105 GLOBAL(ashiftrt_r4_28):
107 GLOBAL(ashiftrt_r4_27):
109 GLOBAL(ashiftrt_r4_26):
111 GLOBAL(ashiftrt_r4_25):
113 GLOBAL(ashiftrt_r4_24):
119 GLOBAL(ashiftrt_r4_23):
121 GLOBAL(ashiftrt_r4_22):
123 GLOBAL(ashiftrt_r4_21):
125 GLOBAL(ashiftrt_r4_20):
127 GLOBAL(ashiftrt_r4_19):
129 GLOBAL(ashiftrt_r4_18):
131 GLOBAL(ashiftrt_r4_17):
133 GLOBAL(ashiftrt_r4_16):
138 GLOBAL(ashiftrt_r4_15):
140 GLOBAL(ashiftrt_r4_14):
142 GLOBAL(ashiftrt_r4_13):
144 GLOBAL(ashiftrt_r4_12):
146 GLOBAL(ashiftrt_r4_11):
148 GLOBAL(ashiftrt_r4_10):
150 GLOBAL(ashiftrt_r4_9):
152 GLOBAL(ashiftrt_r4_8):
154 GLOBAL(ashiftrt_r4_7):
156 GLOBAL(ashiftrt_r4_6):
158 GLOBAL(ashiftrt_r4_5):
160 GLOBAL(ashiftrt_r4_4):
162 GLOBAL(ashiftrt_r4_3):
164 GLOBAL(ashiftrt_r4_2):
166 GLOBAL(ashiftrt_r4_1):
170 GLOBAL(ashiftrt_r4_0):
194 .global GLOBAL(ashrsi3)
199 mova LOCAL(ashrsi3_table),r0
210 LOCAL(ashrsi3_table):
211 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
212 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
213 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
214 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
215 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
216 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
217 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
218 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
219 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
220 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
221 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
222 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
223 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
224 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
225 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
226 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
227 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
228 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
229 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
230 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
231 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
232 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
233 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
234 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
235 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
236 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
237 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
238 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
239 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
240 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
241 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
242 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
342 .global GLOBAL(ashlsi3)
347 mova LOCAL(ashlsi3_table),r0
358 LOCAL(ashlsi3_table):
359 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
360 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
361 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
362 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
363 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
364 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
365 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
366 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
367 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
368 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
369 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
370 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
371 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
372 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
373 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
374 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
375 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
376 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
377 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
378 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
379 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
380 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
381 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
382 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
383 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
384 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
385 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
386 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
387 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
388 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
389 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
390 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
499 .global GLOBAL(lshrsi3)
504 mova LOCAL(lshrsi3_table),r0
515 LOCAL(lshrsi3_table):
516 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
517 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
518 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
519 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
520 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
521 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
522 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
523 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
524 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
525 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
526 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
527 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
528 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
529 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
530 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
531 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
532 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
533 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
534 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
535 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
536 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
537 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
538 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
539 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
540 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
541 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
542 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
543 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
544 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
545 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
546 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
547 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
640 ! done all the large groups, do the remainder
645 mova GLOBAL(movstrSI0),r0
651 .global GLOBAL(movstrSI64)
655 .global GLOBAL(movstrSI60)
659 .global GLOBAL(movstrSI56)
663 .global GLOBAL(movstrSI52)
667 .global GLOBAL(movstrSI48)
671 .global GLOBAL(movstrSI44)
675 .global GLOBAL(movstrSI40)
679 .global GLOBAL(movstrSI36)
683 .global GLOBAL(movstrSI32)
687 .global GLOBAL(movstrSI28)
691 .global GLOBAL(movstrSI24)
695 .global GLOBAL(movstrSI20)
699 .global GLOBAL(movstrSI16)
703 .global GLOBAL(movstrSI12)
707 .global GLOBAL(movstrSI8)
711 .global GLOBAL(movstrSI4)
721 .global GLOBAL(movstr)
782 .global GLOBAL(movstr_i4_even)
783 .global GLOBAL(movstr_i4_odd)
784 .global GLOBAL(movstrSI12_i4)
794 GLOBAL(movstr_i4_odd):
806 bt/s L_movstr_2mod4_end
820 GLOBAL(movstr_i4_even):
822 bra L_movstr_start_even
826 GLOBAL(movstrSI12_i4):
839 .global GLOBAL(mulsi3)
843 ! r0 = aabb*ccdd via partial products
845 ! if aa == 0 and cc = 0
849 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
853 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
854 mov r5,r3 ! r3 = ccdd
855 swap.w r4,r2 ! r2 = bbaa
856 xtrct r2,r3 ! r3 = aacc
857 tst r3,r3 ! msws zero ?
859 rts ! yes - then we have the answer
862 hiset: sts macl,r0 ! r0 = bb*dd
863 mulu.w r2,r5 ! brewing macl = aa*dd
865 mulu.w r3,r4 ! brewing macl = cc*bb
874 #endif /* ! __SH5__ */
877 !! 4 byte integer Divide code for the Hitachi SH
879 !! args in r4 and r5, result in fpul, clobber dr0, dr2
881 .global GLOBAL(sdivsi3_i4)
891 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
892 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
894 #if ! __SH5__ || __SH5__ == 32
898 .global GLOBAL(sdivsi3_i4)
913 #endif /* ! __SH5__ || __SH5__ == 32 */
914 #endif /* ! __SH4__ */
918 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
920 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
927 !! args in r4 and r5, result in r0 clobber r1,r2,r3
929 .global GLOBAL(sdivsi3)
932 .section .text..SHmedia32,"ax"
937 /* The assembly code that follows is a hand-optimized version of the C
938 code that follows. Note that the registers that are modified are
939 exactly those listed as clobbered in the patterns divsi3_i1 and
945 register unsigned long long r18 asm ("r18
");
946 register unsigned long long r19 asm ("r19
");
947 register unsigned long long r0 asm ("r0
") = 0;
948 register unsigned long long r1 asm ("r1
") = 1;
949 register int r2 asm ("r2
") = i >> 31;
950 register int r3 asm ("r3
") = j >> 31;
962 r0 |= r1, r18 -= r19;
963 while (r19 >>= 1, r1 >>= 1);
969 pt/l LOCAL(sdivsi3_dontadd), tr2
970 pt/l LOCAL(sdivsi3_loop), tr1
987 LOCAL(sdivsi3_dontadd):
1079 #endif /* ! __SHMEDIA__ */
1080 #endif /* ! __SH4__ */
1085 !! 4 byte integer Divide code for the Hitachi SH
1087 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1089 .global GLOBAL(udivsi3_i4)
1101 #ifdef __LITTLE_ENDIAN__
1125 .align 3 ! make double below 8 byte aligned.
1130 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1131 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1133 #if ! __SH5__ || __SH5__ == 32
1137 .global GLOBAL(udivsi3_i4)
1151 #ifdef __LITTLE_ENDIAN__
1171 .align 3 ! make double below 8 byte aligned.
1186 #endif /* ! __SH5__ || __SH5__ == 32 */
1187 #endif /* ! __SH4__ */
1191 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1193 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1195 !! Steve Chamberlain
1200 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1201 .global GLOBAL(udivsi3)
1205 .section .text..SHmedia32,"ax"
1210 /* The assembly code that follows is a hand-optimized version of the C
1211 code that follows. Note that the registers that are modified are
1212 exactly those listed as clobbered in the patterns udivsi3_i1 and
1219 register unsigned long long r0 asm ("r0
") = 0;
1220 register unsigned long long r18 asm ("r18
") = 1;
1221 register unsigned long long r4 asm ("r4
") = i;
1222 register unsigned long long r19 asm ("r19
") = j;
1228 r0 |= r18, r4 -= r19;
1229 while (r19 >>= 1, r18 >>= 1);
1235 pt/l LOCAL(udivsi3_dontadd), tr2
1236 pt/l LOCAL(udivsi3_loop), tr1
1244 LOCAL(udivsi3_loop):
1248 LOCAL(udivsi3_dontadd):
1258 ! get one bit from the msb of the numerator into the T
1259 ! bit and divide it by whats in r5. Put the answer bit
1260 ! into the T bit so it can come out again at the bottom
1262 rotcl r4 ; div1 r5,r0
1263 rotcl r4 ; div1 r5,r0
1264 rotcl r4 ; div1 r5,r0
1265 rotcl r4 ; div1 r5,r0
1266 rotcl r4 ; div1 r5,r0
1267 rotcl r4 ; div1 r5,r0
1268 rotcl r4 ; div1 r5,r0
1269 rotcl r4 ; div1 r5,r0
1271 rotcl r4 ; div1 r5,r0
1272 rotcl r4 ; div1 r5,r0
1273 rotcl r4 ; div1 r5,r0
1274 rotcl r4 ; div1 r5,r0
1275 rotcl r4 ; div1 r5,r0
1276 rotcl r4 ; div1 r5,r0
1277 rotcl r4 ; div1 r5,r0
1278 rotcl r4 ; div1 r5,r0
1280 rotcl r4 ; div1 r5,r0
1281 rotcl r4 ; div1 r5,r0
1282 rotcl r4 ; div1 r5,r0
1283 rotcl r4 ; div1 r5,r0
1284 rotcl r4 ; div1 r5,r0
1285 rotcl r4 ; div1 r5,r0
1286 rotcl r4 ; div1 r5,r0
1287 rotcl r4 ; div1 r5,r0
1290 rotcl r4 ; div1 r5,r0
1291 rotcl r4 ; div1 r5,r0
1292 rotcl r4 ; div1 r5,r0
1293 rotcl r4 ; div1 r5,r0
1294 rotcl r4 ; div1 r5,r0
1295 rotcl r4 ; div1 r5,r0
1296 rotcl r4 ; div1 r5,r0
1297 rotcl r4 ; div1 r5,r0
1302 #endif /* ! __SHMEDIA__ */
1303 #endif /* __SH4__ */
1306 #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1310 .global GLOBAL(set_fpscr)
1313 mov.l LOCAL(set_fpscr_L1),r1
1319 #if defined(__SH4__)
1322 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1331 #if defined(__SH4__)
1335 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1341 LOCAL(set_fpscr_L1):
1342 .long GLOBAL(fpscr_values)
1344 .comm GLOBAL(fpscr_values),8,4
1346 .comm GLOBAL(fpscr_values),8
1348 #endif /* SH3E / SH4 */
1349 #endif /* L_set_fpscr */
1350 #ifdef L_ic_invalidate
1353 .section .text..SHmedia32,"ax"
1355 .global GLOBAL(ic_invalidate)
1356 GLOBAL(ic_invalidate):
1361 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1362 .global GLOBAL(ic_invalidate)
1363 GLOBAL(ic_invalidate):
1367 /* Compute how many cache lines 0f is away from r4. */
1370 /* Prepare to branch to 0f plus the cache-line offset. */
1377 /* This must be aligned to the beginning of a cache line. */
1379 .rept 256 /* There are 256 cache lines of 32 bytes. */
1386 #endif /* L_ic_invalidate */
1388 #if defined (__SH5__) && __SH5__ == 32
1389 #ifdef L_shcompact_call_trampoline
1392 LOCAL(ct_main_table):
1393 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
1394 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
1395 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
1396 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
1397 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
1398 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
1399 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
1400 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
1401 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
1402 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
1403 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
1404 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
1405 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
1406 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
1407 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
1408 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
1409 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
1410 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
1411 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
1412 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
1413 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
1414 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
1415 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
1416 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
1417 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
1418 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
1419 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
1420 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1421 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1422 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1423 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1424 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
1425 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
1427 .section .text..SHmedia32, "ax"
1430 /* This function loads 64-bit general-purpose registers from the
1431 stack, from a memory address contained in them or from an FP
1432 register, according to a cookie passed in r1. Its execution
1433 time is linear on the number of registers that actually have
1434 to be copied. See sh.h for details on the actual bit pattern.
1436 The function to be called is passed in r0. If a 32-bit return
1437 value is expected, the actual function will be tail-called,
1438 otherwise the return address will be stored in r10 (that the
1439 caller should expect to be clobbered) and the return value
1440 will be expanded into r2/r3 upon return. */
1442 .global GLOBAL(GCC_shcompact_call_trampoline)
1443 GLOBAL(GCC_shcompact_call_trampoline):
1444 ptabs/l r0, tr0 /* Prepare to call the actual function. */
1445 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
1446 pt/l LOCAL(ct_loop), tr1
1448 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
1453 LOCAL(ct_main_label):
1456 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
1457 /* It must be dr0, so just do it. */
1463 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
1464 /* It is either dr0 or dr2. */
1473 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
1474 shlri r1, 23 - 3, r34
1475 andi r34, 3 << 3, r33
1476 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
1477 LOCAL(ct_r4_fp_base):
1483 LOCAL(ct_r4_fp_copy):
1490 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
1491 shlri r1, 20 - 3, r34
1492 andi r34, 3 << 3, r33
1493 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
1494 LOCAL(ct_r5_fp_base):
1500 LOCAL(ct_r5_fp_copy):
1509 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
1510 /* It must be dr8. */
1516 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
1517 shlri r1, 16 - 3, r34
1518 andi r34, 3 << 3, r33
1519 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
1520 LOCAL(ct_r6_fp_base):
1526 LOCAL(ct_r6_fp_copy):
1535 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
1536 /* It is either dr8 or dr10. */
1544 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
1545 shlri r1, 12 - 3, r34
1546 andi r34, 3 << 3, r33
1547 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
1548 LOCAL(ct_r7_fp_base):
1553 LOCAL(ct_r7_fp_copy):
1562 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
1563 /* It is either dr8 or dr10. */
1565 andi r1, 1 << 8, r32
1571 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
1572 shlri r1, 8 - 3, r34
1573 andi r34, 3 << 3, r33
1574 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
1575 LOCAL(ct_r8_fp_base):
1580 LOCAL(ct_r8_fp_copy):
1589 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
1590 /* It is either dr8 or dr10. */
1592 andi r1, 1 << 4, r32
1598 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
1599 shlri r1, 4 - 3, r34
1600 andi r34, 3 << 3, r33
1601 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
1602 LOCAL(ct_r9_fp_base):
1607 LOCAL(ct_r9_fp_copy):
1616 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
1617 pt/l LOCAL(ct_r2_load), tr2
1626 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
1627 pt/l LOCAL(ct_r3_load), tr2
1635 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
1636 pt/l LOCAL(ct_r4_load), tr2
1644 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
1645 pt/l LOCAL(ct_r5_load), tr2
1653 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
1654 pt/l LOCAL(ct_r6_load), tr2
1661 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
1662 pt/l LOCAL(ct_r7_load), tr2
1669 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
1670 pt/l LOCAL(ct_r8_load), tr2
1677 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
1678 pt/l LOCAL(ct_check_tramp), tr2
1702 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
1709 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
1716 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
1723 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
1730 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
1737 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
1743 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
1749 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
1750 andi r1, 7 << 1, r30
1751 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
1753 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
1757 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
1770 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
1773 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
1774 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
1775 pt/u LOCAL(ct_ret_wide), tr2
1778 LOCAL(ct_call_func): /* Just branch to the function. */
1780 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
1781 64-bit return value. */
1785 #if __LITTLE_ENDIAN__
1793 #endif /* L_shcompact_call_trampoline */
1795 #ifdef L_shcompact_return_trampoline
1796 /* This function does the converse of the code in `ret_wide'
1797 above. It is tail-called by SHcompact functions returning
1798 64-bit non-floating-point values, to pack the 32-bit values in
1799 r2 and r3 into r2. */
1802 .section .text..SHmedia32, "ax"
1804 .global GLOBAL(GCC_shcompact_return_trampoline)
1805 GLOBAL(GCC_shcompact_return_trampoline):
1807 #if __LITTLE_ENDIAN__
1816 #endif /* L_shcompact_return_trampoline */
1818 #ifdef L_shcompact_incoming_args
1821 LOCAL(ia_main_table):
1822 .word 1 /* Invalid, just loop */
1823 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
1824 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
1825 .word 1 /* Invalid, just loop */
1826 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
1827 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
1828 .word 1 /* Invalid, just loop */
1829 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
1830 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
1831 .word 1 /* Invalid, just loop */
1832 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
1833 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
1834 .word 1 /* Invalid, just loop */
1835 .word 1 /* Invalid, just loop */
1836 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
1837 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
1838 .word 1 /* Invalid, just loop */
1839 .word 1 /* Invalid, just loop */
1840 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
1841 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
1842 .word 1 /* Invalid, just loop */
1843 .word 1 /* Invalid, just loop */
1844 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
1845 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
1846 .word 1 /* Invalid, just loop */
1847 .word 1 /* Invalid, just loop */
1848 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
1849 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
1850 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
1851 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
1852 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
1853 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
1854 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
1856 .section .text..SHmedia32, "ax"
1859 /* This function stores 64-bit general-purpose registers back in
1860 the stack, starting at @(r1), where the cookie is supposed to
1861 have been stored, and loads the address in which each register
1862 was stored into itself. Its execution time is linear on the
1863 number of registers that actually have to be copied, and it is
1864 optimized for structures larger than 64 bits, as opposed to
1865 invidivual `long long' arguments. See sh.h for details on the
1866 actual bit pattern. */
1868 .global GLOBAL(GCC_shcompact_incoming_args)
1869 GLOBAL(GCC_shcompact_incoming_args):
1870 ptabs/l r18, tr0 /* Prepare to return. */
1871 shlri r17, 32, r0 /* Load the cookie. */
1872 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r35
1873 pt/l LOCAL(ia_loop), tr1
1875 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r35
1880 LOCAL(ia_main_label):
1883 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
1892 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
1901 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
1910 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
1919 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
1928 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
1936 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
1944 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
1948 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
1955 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
1962 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
1969 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
1976 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
1983 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
1989 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
1995 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
1996 andi r0, 7 << 1, r30
1997 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r32
1999 shori LOCAL(ia_end_of_push_seq) & 65535, r32
2003 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2016 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2018 LOCAL(ia_return): /* Return. */
2020 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2021 #endif /* L_shcompact_incoming_args */
2024 #ifdef L_nested_trampoline
2026 .section .text..SHmedia32,"ax"
2030 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2031 .global GLOBAL(GCC_nested_trampoline)
2032 GLOBAL(GCC_nested_trampoline):
2048 #endif /* L_nested_trampoline */
2049 #endif /* __SH5__ */
2051 #ifdef L_push_pop_shmedia_regs
2052 .section .text..SHmedia32,"ax"
2055 #ifndef __SH4_NOFPU__
2056 .global GLOBAL(GCC_push_shmedia_regs)
2057 GLOBAL(GCC_push_shmedia_regs):
2058 addi.l r15, -14*8, r15
2059 fst.d r15, 13*8, dr62
2060 fst.d r15, 12*8, dr60
2061 fst.d r15, 11*8, dr58
2062 fst.d r15, 10*8, dr56
2063 fst.d r15, 9*8, dr54
2064 fst.d r15, 8*8, dr52
2065 fst.d r15, 7*8, dr50
2066 fst.d r15, 6*8, dr48
2067 fst.d r15, 5*8, dr46
2068 fst.d r15, 4*8, dr44
2069 fst.d r15, 3*8, dr42
2070 fst.d r15, 2*8, dr40
2071 fst.d r15, 1*8, dr38
2072 fst.d r15, 0*8, dr36
2074 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2075 GLOBAL(GCC_push_shmedia_regs_nofpu):
2077 addi.l r15, -27*8, r15
2110 #ifndef __SH4_NOFPU__
2111 .global GLOBAL(GCC_pop_shmedia_regs)
2112 GLOBAL(GCC_pop_shmedia_regs):
2115 fld.d r15, 40*8, dr62
2116 fld.d r15, 39*8, dr60
2117 fld.d r15, 38*8, dr58
2118 fld.d r15, 37*8, dr56
2119 fld.d r15, 36*8, dr54
2120 fld.d r15, 35*8, dr52
2121 fld.d r15, 34*8, dr50
2122 fld.d r15, 33*8, dr48
2123 fld.d r15, 32*8, dr46
2124 fld.d r15, 31*8, dr44
2125 fld.d r15, 30*8, dr42
2126 fld.d r15, 29*8, dr40
2127 fld.d r15, 28*8, dr38
2128 fld.d r15, 27*8, dr36
2131 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2132 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2168 #endif /* __SH5__ == 32 */
2169 #endif /* L_push_pop_shmedia_regs */