1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001
2 Free Software Foundation
, Inc.
4 This file is free software
; you can redistribute it and/or modify it
5 under the terms of the GNU General
Public License as published by the
6 Free Software Foundation
; either version 2, or (at your option) any
9 In addition to the permissions
in the GNU General
Public License
, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of
this file
into combinations with other programs
,
12 and to distribute those combinations without any restriction coming
13 from the use of
this file.
(The General
Public License restrictions
14 do apply
in other respects
; for example, they cover modification of
15 the file
, and distribution when
not linked
into a combine
18 This file is distributed
in the hope that it will be useful
, but
19 WITHOUT ANY WARRANTY
; without even the implied warranty of
20 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General
Public License for more details.
23 You should have received a copy of the GNU General
Public License
24 along with
this program
; see the file COPYING. If not, write to
25 the Free Software Foundation
, 59 Temple Place
- Suite
330,
26 Boston
, MA
02111-1307, USA.
*/
28 !! libgcc routines for the Hitachi
/ SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
33 !! recoded
in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
37 ELF
local label prefixes by J
"orn Rennecke
41 #define LOCAL(X) .L_##X
43 #define LOCAL(X) L_##X
46 #define CONCAT(A,B) A##B
47 #define GLOBAL0(U,X) CONCAT(U,__##X)
48 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
50 #if defined __SH5__ && ! defined __SH4_NOFPU__
56 .global GLOBAL(ashiftrt_r4_0)
57 .global GLOBAL(ashiftrt_r4_1)
58 .global GLOBAL(ashiftrt_r4_2)
59 .global GLOBAL(ashiftrt_r4_3)
60 .global GLOBAL(ashiftrt_r4_4)
61 .global GLOBAL(ashiftrt_r4_5)
62 .global GLOBAL(ashiftrt_r4_6)
63 .global GLOBAL(ashiftrt_r4_7)
64 .global GLOBAL(ashiftrt_r4_8)
65 .global GLOBAL(ashiftrt_r4_9)
66 .global GLOBAL(ashiftrt_r4_10)
67 .global GLOBAL(ashiftrt_r4_11)
68 .global GLOBAL(ashiftrt_r4_12)
69 .global GLOBAL(ashiftrt_r4_13)
70 .global GLOBAL(ashiftrt_r4_14)
71 .global GLOBAL(ashiftrt_r4_15)
72 .global GLOBAL(ashiftrt_r4_16)
73 .global GLOBAL(ashiftrt_r4_17)
74 .global GLOBAL(ashiftrt_r4_18)
75 .global GLOBAL(ashiftrt_r4_19)
76 .global GLOBAL(ashiftrt_r4_20)
77 .global GLOBAL(ashiftrt_r4_21)
78 .global GLOBAL(ashiftrt_r4_22)
79 .global GLOBAL(ashiftrt_r4_23)
80 .global GLOBAL(ashiftrt_r4_24)
81 .global GLOBAL(ashiftrt_r4_25)
82 .global GLOBAL(ashiftrt_r4_26)
83 .global GLOBAL(ashiftrt_r4_27)
84 .global GLOBAL(ashiftrt_r4_28)
85 .global GLOBAL(ashiftrt_r4_29)
86 .global GLOBAL(ashiftrt_r4_30)
87 .global GLOBAL(ashiftrt_r4_31)
88 .global GLOBAL(ashiftrt_r4_32)
91 GLOBAL(ashiftrt_r4_32):
92 GLOBAL(ashiftrt_r4_31):
97 GLOBAL(ashiftrt_r4_30):
99 GLOBAL(ashiftrt_r4_29):
101 GLOBAL(ashiftrt_r4_28):
103 GLOBAL(ashiftrt_r4_27):
105 GLOBAL(ashiftrt_r4_26):
107 GLOBAL(ashiftrt_r4_25):
109 GLOBAL(ashiftrt_r4_24):
115 GLOBAL(ashiftrt_r4_23):
117 GLOBAL(ashiftrt_r4_22):
119 GLOBAL(ashiftrt_r4_21):
121 GLOBAL(ashiftrt_r4_20):
123 GLOBAL(ashiftrt_r4_19):
125 GLOBAL(ashiftrt_r4_18):
127 GLOBAL(ashiftrt_r4_17):
129 GLOBAL(ashiftrt_r4_16):
134 GLOBAL(ashiftrt_r4_15):
136 GLOBAL(ashiftrt_r4_14):
138 GLOBAL(ashiftrt_r4_13):
140 GLOBAL(ashiftrt_r4_12):
142 GLOBAL(ashiftrt_r4_11):
144 GLOBAL(ashiftrt_r4_10):
146 GLOBAL(ashiftrt_r4_9):
148 GLOBAL(ashiftrt_r4_8):
150 GLOBAL(ashiftrt_r4_7):
152 GLOBAL(ashiftrt_r4_6):
154 GLOBAL(ashiftrt_r4_5):
156 GLOBAL(ashiftrt_r4_4):
158 GLOBAL(ashiftrt_r4_3):
160 GLOBAL(ashiftrt_r4_2):
162 GLOBAL(ashiftrt_r4_1):
166 GLOBAL(ashiftrt_r4_0):
190 .global GLOBAL(ashrsi3)
195 mova LOCAL(ashrsi3_table),r0
206 LOCAL(ashrsi3_table):
207 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
208 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
209 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
210 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
211 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
212 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
213 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
214 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
215 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
216 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
217 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
218 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
219 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
220 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
221 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
222 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
223 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
224 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
225 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
226 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
227 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
228 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
229 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
230 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
231 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
232 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
233 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
234 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
235 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
236 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
237 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
238 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
338 .global GLOBAL(ashlsi3)
343 mova LOCAL(ashlsi3_table),r0
354 LOCAL(ashlsi3_table):
355 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
356 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
357 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
358 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
359 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
360 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
361 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
362 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
363 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
364 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
365 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
366 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
367 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
368 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
369 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
370 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
371 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
372 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
373 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
374 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
375 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
376 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
377 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
378 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
379 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
380 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
381 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
382 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
383 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
384 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
385 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
386 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
495 .global GLOBAL(lshrsi3)
500 mova LOCAL(lshrsi3_table),r0
511 LOCAL(lshrsi3_table):
512 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
513 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
514 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
515 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
516 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
517 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
518 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
519 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
520 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
521 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
522 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
523 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
524 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
525 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
526 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
527 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
528 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
529 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
530 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
531 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
532 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
533 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
534 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
535 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
536 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
537 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
538 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
539 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
540 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
541 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
542 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
543 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
636 ! done all the large groups, do the remainder
641 mova GLOBAL(movstrSI0),r0
647 .global GLOBAL(movstrSI64)
651 .global GLOBAL(movstrSI60)
655 .global GLOBAL(movstrSI56)
659 .global GLOBAL(movstrSI52)
663 .global GLOBAL(movstrSI48)
667 .global GLOBAL(movstrSI44)
671 .global GLOBAL(movstrSI40)
675 .global GLOBAL(movstrSI36)
679 .global GLOBAL(movstrSI32)
683 .global GLOBAL(movstrSI28)
687 .global GLOBAL(movstrSI24)
691 .global GLOBAL(movstrSI20)
695 .global GLOBAL(movstrSI16)
699 .global GLOBAL(movstrSI12)
703 .global GLOBAL(movstrSI8)
707 .global GLOBAL(movstrSI4)
717 .global GLOBAL(movstr)
778 .global GLOBAL(movstr_i4_even)
779 .global GLOBAL(movstr_i4_odd)
780 .global GLOBAL(movstrSI12_i4)
790 GLOBAL(movstr_i4_odd):
802 bt/s L_movstr_2mod4_end
816 GLOBAL(movstr_i4_even):
818 bra L_movstr_start_even
822 GLOBAL(movstrSI12_i4):
835 .global GLOBAL(mulsi3)
839 ! r0 = aabb*ccdd via partial products
841 ! if aa == 0 and cc = 0
845 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
849 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
850 mov r5,r3 ! r3 = ccdd
851 swap.w r4,r2 ! r2 = bbaa
852 xtrct r2,r3 ! r3 = aacc
853 tst r3,r3 ! msws zero ?
855 rts ! yes - then we have the answer
858 hiset: sts macl,r0 ! r0 = bb*dd
859 mulu.w r2,r5 ! brewing macl = aa*dd
861 mulu.w r3,r4 ! brewing macl = cc*bb
870 #endif /* ! __SH5__ */
873 !! 4 byte integer Divide code for the Hitachi SH
875 !! args in r4 and r5, result in fpul, clobber dr0, dr2
877 .global GLOBAL(sdivsi3_i4)
887 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
888 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
890 #if ! __SH5__ || __SH5__ == 32
894 .global GLOBAL(sdivsi3_i4)
909 #endif /* ! __SH5__ || __SH5__ == 32 */
910 #endif /* ! __SH4__ */
914 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
916 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
923 !! args in r4 and r5, result in r0 clobber r1,r2,r3
925 .global GLOBAL(sdivsi3)
928 .section .text..SHmedia32,"ax"
933 /* The assembly code that follows is a hand-optimized version of the C
934 code that follows. Note that the registers that are modified are
935 exactly those listed as clobbered in the patterns divsi3_i1 and
941 register unsigned long long r18 asm ("r18
");
942 register unsigned long long r19 asm ("r19
");
943 register unsigned long long r0 asm ("r0
") = 0;
944 register unsigned long long r1 asm ("r1
") = 1;
945 register int r2 asm ("r2
") = i >> 31;
946 register int r3 asm ("r3
") = j >> 31;
958 r0 |= r1, r18 -= r19;
959 while (r19 >>= 1, r1 >>= 1);
965 pt/l LOCAL(sdivsi3_dontadd), tr2
966 pt/l LOCAL(sdivsi3_loop), tr1
983 LOCAL(sdivsi3_dontadd):
1075 #endif /* ! __SHMEDIA__ */
1076 #endif /* ! __SH4__ */
1081 !! 4 byte integer Divide code for the Hitachi SH
1083 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1085 .global GLOBAL(udivsi3_i4)
1097 #ifdef __LITTLE_ENDIAN__
1121 .align 3 ! make double below 8 byte aligned.
1126 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1127 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1129 #if ! __SH5__ || __SH5__ == 32
1133 .global GLOBAL(udivsi3_i4)
1147 #ifdef __LITTLE_ENDIAN__
1167 .align 3 ! make double below 8 byte aligned.
1182 #endif /* ! __SH5__ || __SH5__ == 32 */
1183 #endif /* ! __SH4__ */
1187 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1189 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1191 !! Steve Chamberlain
1196 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1197 .global GLOBAL(udivsi3)
1201 .section .text..SHmedia32,"ax"
1206 /* The assembly code that follows is a hand-optimized version of the C
1207 code that follows. Note that the registers that are modified are
1208 exactly those listed as clobbered in the patterns udivsi3_i1 and
1215 register unsigned long long r0 asm ("r0
") = 0;
1216 register unsigned long long r18 asm ("r18
") = 1;
1217 register unsigned long long r4 asm ("r4
") = i;
1218 register unsigned long long r19 asm ("r19
") = j;
1224 r0 |= r18, r4 -= r19;
1225 while (r19 >>= 1, r18 >>= 1);
1231 pt/l LOCAL(udivsi3_dontadd), tr2
1232 pt/l LOCAL(udivsi3_loop), tr1
1240 LOCAL(udivsi3_loop):
1244 LOCAL(udivsi3_dontadd):
1254 ! get one bit from the msb of the numerator into the T
1255 ! bit and divide it by whats in r5. Put the answer bit
1256 ! into the T bit so it can come out again at the bottom
1258 rotcl r4 ; div1 r5,r0
1259 rotcl r4 ; div1 r5,r0
1260 rotcl r4 ; div1 r5,r0
1261 rotcl r4 ; div1 r5,r0
1262 rotcl r4 ; div1 r5,r0
1263 rotcl r4 ; div1 r5,r0
1264 rotcl r4 ; div1 r5,r0
1265 rotcl r4 ; div1 r5,r0
1267 rotcl r4 ; div1 r5,r0
1268 rotcl r4 ; div1 r5,r0
1269 rotcl r4 ; div1 r5,r0
1270 rotcl r4 ; div1 r5,r0
1271 rotcl r4 ; div1 r5,r0
1272 rotcl r4 ; div1 r5,r0
1273 rotcl r4 ; div1 r5,r0
1274 rotcl r4 ; div1 r5,r0
1276 rotcl r4 ; div1 r5,r0
1277 rotcl r4 ; div1 r5,r0
1278 rotcl r4 ; div1 r5,r0
1279 rotcl r4 ; div1 r5,r0
1280 rotcl r4 ; div1 r5,r0
1281 rotcl r4 ; div1 r5,r0
1282 rotcl r4 ; div1 r5,r0
1283 rotcl r4 ; div1 r5,r0
1286 rotcl r4 ; div1 r5,r0
1287 rotcl r4 ; div1 r5,r0
1288 rotcl r4 ; div1 r5,r0
1289 rotcl r4 ; div1 r5,r0
1290 rotcl r4 ; div1 r5,r0
1291 rotcl r4 ; div1 r5,r0
1292 rotcl r4 ; div1 r5,r0
1293 rotcl r4 ; div1 r5,r0
1298 #endif /* ! __SHMEDIA__ */
1299 #endif /* __SH4__ */
1302 #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1306 .global GLOBAL(set_fpscr)
1309 mov.l LOCAL(set_fpscr_L1),r1
1315 #if defined(__SH4__)
1318 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1327 #if defined(__SH4__)
1331 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1337 LOCAL(set_fpscr_L1):
1338 .long GLOBAL(fpscr_values)
1340 .comm GLOBAL(fpscr_values),8,4
1342 .comm GLOBAL(fpscr_values),8
1344 #endif /* SH3E / SH4 */
1345 #endif /* L_set_fpscr */
1346 #ifdef L_ic_invalidate
1349 .section .text..SHmedia32,"ax"
1351 .global GLOBAL(ic_invalidate)
1352 GLOBAL(ic_invalidate):
1357 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1358 .global GLOBAL(ic_invalidate)
1359 GLOBAL(ic_invalidate):
1363 /* Compute how many cache lines 0f is away from r4. */
1366 /* Prepare to branch to 0f plus the cache-line offset. */
1373 /* This must be aligned to the beginning of a cache line. */
1375 .rept 256 /* There are 256 cache lines of 32 bytes. */
1382 #endif /* L_ic_invalidate */
1384 #if defined (__SH5__) && __SH5__ == 32
1385 #ifdef L_shcompact_call_trampoline
1388 LOCAL(ct_main_table):
1389 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
1390 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
1391 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
1392 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
1393 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
1394 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
1395 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
1396 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
1397 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
1398 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
1399 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
1400 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
1401 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
1402 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
1403 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
1404 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
1405 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
1406 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
1407 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
1408 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
1409 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
1410 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
1411 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
1412 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
1413 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
1414 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
1415 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
1416 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1417 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1418 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1419 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1420 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
1421 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
1423 .section .text..SHmedia32, "ax"
1426 /* This function loads 64-bit general-purpose registers from the
1427 stack, from a memory address contained in them or from an FP
1428 register, according to a cookie passed in r1. Its execution
1429 time is linear on the number of registers that actually have
1430 to be copied. See sh.h for details on the actual bit pattern.
1432 The function to be called is passed in r0. If a 32-bit return
1433 value is expected, the actual function will be tail-called,
1434 otherwise the return address will be stored in r10 (that the
1435 caller should expect to be clobbered) and the return value
1436 will be expanded into r2/r3 upon return. */
1438 .global GLOBAL(GCC_shcompact_call_trampoline)
1439 GLOBAL(GCC_shcompact_call_trampoline):
1440 ptabs/l r0, tr0 /* Prepare to call the actual function. */
1441 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
1442 pt/l LOCAL(ct_loop), tr1
1444 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
1449 LOCAL(ct_main_label):
1452 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
1453 /* It must be dr0, so just do it. */
1459 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
1460 /* It is either dr0 or dr2. */
1469 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
1470 shlri r1, 23 - 3, r34
1471 andi r34, 3 << 3, r33
1472 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
1473 LOCAL(ct_r4_fp_base):
1479 LOCAL(ct_r4_fp_copy):
1486 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
1487 shlri r1, 20 - 3, r34
1488 andi r34, 3 << 3, r33
1489 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
1490 LOCAL(ct_r5_fp_base):
1496 LOCAL(ct_r5_fp_copy):
1505 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
1506 /* It must be dr8. */
1512 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
1513 shlri r1, 16 - 3, r34
1514 andi r34, 3 << 3, r33
1515 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
1516 LOCAL(ct_r6_fp_base):
1522 LOCAL(ct_r6_fp_copy):
1531 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
1532 /* It is either dr8 or dr10. */
1540 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
1541 shlri r1, 12 - 3, r34
1542 andi r34, 3 << 3, r33
1543 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
1544 LOCAL(ct_r7_fp_base):
1549 LOCAL(ct_r7_fp_copy):
1558 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
1559 /* It is either dr8 or dr10. */
1561 andi r1, 1 << 8, r32
1567 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
1568 shlri r1, 8 - 3, r34
1569 andi r34, 3 << 3, r33
1570 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
1571 LOCAL(ct_r8_fp_base):
1576 LOCAL(ct_r8_fp_copy):
1585 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
1586 /* It is either dr8 or dr10. */
1588 andi r1, 1 << 4, r32
1594 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
1595 shlri r1, 4 - 3, r34
1596 andi r34, 3 << 3, r33
1597 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
1598 LOCAL(ct_r9_fp_base):
1603 LOCAL(ct_r9_fp_copy):
1612 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
1613 pt/l LOCAL(ct_r2_load), tr2
1622 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
1623 pt/l LOCAL(ct_r3_load), tr2
1631 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
1632 pt/l LOCAL(ct_r4_load), tr2
1640 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
1641 pt/l LOCAL(ct_r5_load), tr2
1649 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
1650 pt/l LOCAL(ct_r6_load), tr2
1657 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
1658 pt/l LOCAL(ct_r7_load), tr2
1665 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
1666 pt/l LOCAL(ct_r8_load), tr2
1673 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
1674 pt/l LOCAL(ct_check_tramp), tr2
1698 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
1705 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
1712 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
1719 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
1726 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
1733 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
1739 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
1745 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
1746 andi r1, 7 << 1, r30
1747 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
1749 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
1753 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
1766 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
1769 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
1770 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
1771 pt/u LOCAL(ct_ret_wide), tr2
1774 LOCAL(ct_call_func): /* Just branch to the function. */
1776 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
1777 64-bit return value. */
1781 #if __LITTLE_ENDIAN__
1789 #endif /* L_shcompact_call_trampoline */
1791 #ifdef L_shcompact_return_trampoline
1792 /* This function does the converse of the code in `ret_wide'
1793 above. It is tail-called by SHcompact functions returning
1794 64-bit non-floating-point values, to pack the 32-bit values in
1795 r2 and r3 into r2. */
1798 .section .text..SHmedia32, "ax"
1800 .global GLOBAL(GCC_shcompact_return_trampoline)
1801 GLOBAL(GCC_shcompact_return_trampoline):
1803 #if __LITTLE_ENDIAN__
1812 #endif /* L_shcompact_return_trampoline */
1814 #ifdef L_shcompact_incoming_args
1817 LOCAL(ia_main_table):
1818 .word 1 /* Invalid, just loop */
1819 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
1820 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
1821 .word 1 /* Invalid, just loop */
1822 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
1823 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
1824 .word 1 /* Invalid, just loop */
1825 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
1826 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
1827 .word 1 /* Invalid, just loop */
1828 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
1829 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
1830 .word 1 /* Invalid, just loop */
1831 .word 1 /* Invalid, just loop */
1832 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
1833 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
1834 .word 1 /* Invalid, just loop */
1835 .word 1 /* Invalid, just loop */
1836 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
1837 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
1838 .word 1 /* Invalid, just loop */
1839 .word 1 /* Invalid, just loop */
1840 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
1841 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
1842 .word 1 /* Invalid, just loop */
1843 .word 1 /* Invalid, just loop */
1844 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
1845 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
1846 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
1847 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
1848 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
1849 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
1850 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
1852 .section .text..SHmedia32, "ax"
1855 /* This function stores 64-bit general-purpose registers back in
1856 the stack, starting at @(r1), where the cookie is supposed to
1857 have been stored, and loads the address in which each register
1858 was stored into itself. Its execution time is linear on the
1859 number of registers that actually have to be copied, and it is
1860 optimized for structures larger than 64 bits, as opposed to
1861 invidivual `long long' arguments. See sh.h for details on the
1862 actual bit pattern. */
1864 .global GLOBAL(GCC_shcompact_incoming_args)
1865 GLOBAL(GCC_shcompact_incoming_args):
1866 ptabs/l r18, tr0 /* Prepare to return. */
1867 shlri r17, 32, r0 /* Load the cookie. */
1868 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r35
1869 pt/l LOCAL(ia_loop), tr1
1871 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r35
1876 LOCAL(ia_main_label):
1879 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
1888 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
1897 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
1906 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
1915 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
1924 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
1932 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
1940 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
1944 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
1951 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
1958 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
1965 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
1972 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
1979 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
1985 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
1991 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
1992 andi r0, 7 << 1, r30
1993 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r32
1995 shori LOCAL(ia_end_of_push_seq) & 65535, r32
1999 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2012 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2014 LOCAL(ia_return): /* Return. */
2016 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2017 #endif /* L_shcompact_incoming_args */
2020 #ifdef L_nested_trampoline
2022 .section .text..SHmedia32,"ax"
2026 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2027 .global GLOBAL(GCC_nested_trampoline)
2028 GLOBAL(GCC_nested_trampoline):
2044 #endif /* L_nested_trampoline */
2045 #endif /* __SH5__ */
2047 #ifdef L_push_pop_shmedia_regs
2048 .section .text..SHmedia32,"ax"
2051 #ifndef __SH4_NOFPU__
2052 .global GLOBAL(GCC_push_shmedia_regs)
2053 GLOBAL(GCC_push_shmedia_regs):
2054 addi.l r15, -14*8, r15
2055 fst.d r15, 13*8, dr62
2056 fst.d r15, 12*8, dr60
2057 fst.d r15, 11*8, dr58
2058 fst.d r15, 10*8, dr56
2059 fst.d r15, 9*8, dr54
2060 fst.d r15, 8*8, dr52
2061 fst.d r15, 7*8, dr50
2062 fst.d r15, 6*8, dr48
2063 fst.d r15, 5*8, dr46
2064 fst.d r15, 4*8, dr44
2065 fst.d r15, 3*8, dr42
2066 fst.d r15, 2*8, dr40
2067 fst.d r15, 1*8, dr38
2068 fst.d r15, 0*8, dr36
2070 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2071 GLOBAL(GCC_push_shmedia_regs_nofpu):
2073 addi.l r15, -27*8, r15
2106 #ifndef __SH4_NOFPU__
2107 .global GLOBAL(GCC_pop_shmedia_regs)
2108 GLOBAL(GCC_pop_shmedia_regs):
2111 fld.d r15, 40*8, dr62
2112 fld.d r15, 39*8, dr60
2113 fld.d r15, 38*8, dr58
2114 fld.d r15, 37*8, dr56
2115 fld.d r15, 36*8, dr54
2116 fld.d r15, 35*8, dr52
2117 fld.d r15, 34*8, dr50
2118 fld.d r15, 33*8, dr48
2119 fld.d r15, 32*8, dr46
2120 fld.d r15, 31*8, dr44
2121 fld.d r15, 30*8, dr42
2122 fld.d r15, 29*8, dr40
2123 fld.d r15, 28*8, dr38
2124 fld.d r15, 27*8, dr36
2127 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2128 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2164 #endif /* __SH5__ == 32 */
2165 #endif /* L_push_pop_shmedia_regs */