1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
2 Free Software Foundation
, Inc.
4 This file is free software
; you can redistribute it and/or modify it
5 under the terms of the GNU General
Public License as published by the
6 Free Software Foundation
; either version 2, or (at your option) any
9 In addition to the permissions
in the GNU General
Public License
, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of
this file
into combinations with other programs
,
12 and to distribute those combinations without any restriction coming
13 from the use of
this file.
(The General
Public License restrictions
14 do apply
in other respects
; for example, they cover modification of
15 the file
, and distribution when
not linked
into a combine
18 This file is distributed
in the hope that it will be useful
, but
19 WITHOUT ANY WARRANTY
; without even the implied warranty of
20 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General
Public License for more details.
23 You should have received a copy of the GNU General
Public License
24 along with
this program
; see the file COPYING. If not, write to
25 the Free Software Foundation
, 59 Temple Place
- Suite
330,
26 Boston
, MA
02111-1307, USA.
*/
28 !! libgcc routines for the Hitachi
/ SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
33 !! recoded
in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
37 ELF
local label prefixes by J
"orn Rennecke
41 #define LOCAL(X) .L_##X
43 #define LOCAL(X) L_##X
46 #define CONCAT(A,B) A##B
47 #define GLOBAL0(U,X) CONCAT(U,__##X)
48 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
50 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
56 .global GLOBAL(ashiftrt_r4_0)
57 .global GLOBAL(ashiftrt_r4_1)
58 .global GLOBAL(ashiftrt_r4_2)
59 .global GLOBAL(ashiftrt_r4_3)
60 .global GLOBAL(ashiftrt_r4_4)
61 .global GLOBAL(ashiftrt_r4_5)
62 .global GLOBAL(ashiftrt_r4_6)
63 .global GLOBAL(ashiftrt_r4_7)
64 .global GLOBAL(ashiftrt_r4_8)
65 .global GLOBAL(ashiftrt_r4_9)
66 .global GLOBAL(ashiftrt_r4_10)
67 .global GLOBAL(ashiftrt_r4_11)
68 .global GLOBAL(ashiftrt_r4_12)
69 .global GLOBAL(ashiftrt_r4_13)
70 .global GLOBAL(ashiftrt_r4_14)
71 .global GLOBAL(ashiftrt_r4_15)
72 .global GLOBAL(ashiftrt_r4_16)
73 .global GLOBAL(ashiftrt_r4_17)
74 .global GLOBAL(ashiftrt_r4_18)
75 .global GLOBAL(ashiftrt_r4_19)
76 .global GLOBAL(ashiftrt_r4_20)
77 .global GLOBAL(ashiftrt_r4_21)
78 .global GLOBAL(ashiftrt_r4_22)
79 .global GLOBAL(ashiftrt_r4_23)
80 .global GLOBAL(ashiftrt_r4_24)
81 .global GLOBAL(ashiftrt_r4_25)
82 .global GLOBAL(ashiftrt_r4_26)
83 .global GLOBAL(ashiftrt_r4_27)
84 .global GLOBAL(ashiftrt_r4_28)
85 .global GLOBAL(ashiftrt_r4_29)
86 .global GLOBAL(ashiftrt_r4_30)
87 .global GLOBAL(ashiftrt_r4_31)
88 .global GLOBAL(ashiftrt_r4_32)
91 GLOBAL(ashiftrt_r4_32):
92 GLOBAL(ashiftrt_r4_31):
97 GLOBAL(ashiftrt_r4_30):
99 GLOBAL(ashiftrt_r4_29):
101 GLOBAL(ashiftrt_r4_28):
103 GLOBAL(ashiftrt_r4_27):
105 GLOBAL(ashiftrt_r4_26):
107 GLOBAL(ashiftrt_r4_25):
109 GLOBAL(ashiftrt_r4_24):
115 GLOBAL(ashiftrt_r4_23):
117 GLOBAL(ashiftrt_r4_22):
119 GLOBAL(ashiftrt_r4_21):
121 GLOBAL(ashiftrt_r4_20):
123 GLOBAL(ashiftrt_r4_19):
125 GLOBAL(ashiftrt_r4_18):
127 GLOBAL(ashiftrt_r4_17):
129 GLOBAL(ashiftrt_r4_16):
134 GLOBAL(ashiftrt_r4_15):
136 GLOBAL(ashiftrt_r4_14):
138 GLOBAL(ashiftrt_r4_13):
140 GLOBAL(ashiftrt_r4_12):
142 GLOBAL(ashiftrt_r4_11):
144 GLOBAL(ashiftrt_r4_10):
146 GLOBAL(ashiftrt_r4_9):
148 GLOBAL(ashiftrt_r4_8):
150 GLOBAL(ashiftrt_r4_7):
152 GLOBAL(ashiftrt_r4_6):
154 GLOBAL(ashiftrt_r4_5):
156 GLOBAL(ashiftrt_r4_4):
158 GLOBAL(ashiftrt_r4_3):
160 GLOBAL(ashiftrt_r4_2):
162 GLOBAL(ashiftrt_r4_1):
166 GLOBAL(ashiftrt_r4_0):
190 .global GLOBAL(ashrsi3)
195 mova LOCAL(ashrsi3_table),r0
206 LOCAL(ashrsi3_table):
207 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
208 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
209 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
210 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
211 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
212 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
213 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
214 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
215 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
216 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
217 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
218 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
219 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
220 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
221 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
222 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
223 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
224 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
225 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
226 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
227 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
228 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
229 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
230 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
231 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
232 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
233 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
234 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
235 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
236 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
237 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
238 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
338 .global GLOBAL(ashlsi3)
343 mova LOCAL(ashlsi3_table),r0
354 LOCAL(ashlsi3_table):
355 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
356 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
357 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
358 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
359 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
360 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
361 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
362 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
363 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
364 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
365 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
366 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
367 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
368 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
369 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
370 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
371 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
372 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
373 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
374 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
375 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
376 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
377 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
378 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
379 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
380 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
381 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
382 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
383 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
384 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
385 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
386 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
495 .global GLOBAL(lshrsi3)
500 mova LOCAL(lshrsi3_table),r0
511 LOCAL(lshrsi3_table):
512 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
513 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
514 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
515 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
516 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
517 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
518 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
519 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
520 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
521 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
522 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
523 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
524 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
525 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
526 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
527 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
528 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
529 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
530 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
531 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
532 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
533 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
534 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
535 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
536 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
537 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
538 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
539 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
540 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
541 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
542 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
543 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
636 ! done all the large groups, do the remainder
641 mova GLOBAL(movstrSI0),r0
647 .global GLOBAL(movstrSI64)
651 .global GLOBAL(movstrSI60)
655 .global GLOBAL(movstrSI56)
659 .global GLOBAL(movstrSI52)
663 .global GLOBAL(movstrSI48)
667 .global GLOBAL(movstrSI44)
671 .global GLOBAL(movstrSI40)
675 .global GLOBAL(movstrSI36)
679 .global GLOBAL(movstrSI32)
683 .global GLOBAL(movstrSI28)
687 .global GLOBAL(movstrSI24)
691 .global GLOBAL(movstrSI20)
695 .global GLOBAL(movstrSI16)
699 .global GLOBAL(movstrSI12)
703 .global GLOBAL(movstrSI8)
707 .global GLOBAL(movstrSI4)
717 .global GLOBAL(movstr)
778 .global GLOBAL(movstr_i4_even)
779 .global GLOBAL(movstr_i4_odd)
780 .global GLOBAL(movstrSI12_i4)
790 GLOBAL(movstr_i4_odd):
802 bt/s L_movstr_2mod4_end
816 GLOBAL(movstr_i4_even):
818 bra L_movstr_start_even
822 GLOBAL(movstrSI12_i4):
835 .global GLOBAL(mulsi3)
839 ! r0 = aabb*ccdd via partial products
841 ! if aa == 0 and cc = 0
845 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
849 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
850 mov r5,r3 ! r3 = ccdd
851 swap.w r4,r2 ! r2 = bbaa
852 xtrct r2,r3 ! r3 = aacc
853 tst r3,r3 ! msws zero ?
855 rts ! yes - then we have the answer
858 hiset: sts macl,r0 ! r0 = bb*dd
859 mulu.w r2,r5 ! brewing macl = aa*dd
861 mulu.w r3,r4 ! brewing macl = cc*bb
870 #endif /* ! __SH5__ */
873 !! 4 byte integer Divide code for the Hitachi SH
875 !! args in r4 and r5, result in fpul, clobber dr0, dr2
877 .global GLOBAL(sdivsi3_i4)
887 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
888 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
890 #if ! __SH5__ || __SH5__ == 32
894 .global GLOBAL(sdivsi3_i4)
909 #endif /* ! __SH5__ || __SH5__ == 32 */
910 #endif /* ! __SH4__ */
914 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
916 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
923 !! args in r4 and r5, result in r0 clobber r1,r2,r3
925 .global GLOBAL(sdivsi3)
928 .section .text..SHmedia32,"ax"
934 /* The assembly code that follows is a hand-optimized version of the C
935 code that follows. Note that the registers that are modified are
936 exactly those listed as clobbered in the patterns divsi3_i1 and
942 register unsigned long long r18 asm ("r18
");
943 register unsigned long long r19 asm ("r19
");
944 register unsigned long long r0 asm ("r0
") = 0;
945 register unsigned long long r1 asm ("r1
") = 1;
946 register int r2 asm ("r2
") = i >> 31;
947 register int r3 asm ("r3
") = j >> 31;
959 r0 |= r1, r18 -= r19;
960 while (r19 >>= 1, r1 >>= 1);
966 pt/l LOCAL(sdivsi3_dontadd), tr2
967 pt/l LOCAL(sdivsi3_loop), tr1
984 LOCAL(sdivsi3_dontadd):
993 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
996 // can create absolute value without extra latency,
997 // but dependent on proper sign extension of inputs:
1000 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1003 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1004 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1012 // If r4 was to be used in-place instead of r21, could use this sequence
1013 // to compute absolute:
1014 // sub r63,r4,r19 // compute absolute value of r4
1015 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1016 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1028 mmacnfx.wl r25,r2,r1
1054 #elif defined __SHMEDIA__
1055 /* m5compact-nofpu */
1056 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1058 .section .text..SHmedia32,"ax"
1061 pt/l LOCAL(sdivsi3_dontsub), tr0
1062 pt/l LOCAL(sdivsi3_loop), tr1
1074 LOCAL(sdivsi3_loop):
1078 LOCAL(sdivsi3_dontsub):
1084 #else /* ! __SHMEDIA__ */
1169 #endif /* ! __SHMEDIA__ */
1170 #endif /* ! __SH4__ */
1175 !! 4 byte integer Divide code for the Hitachi SH
1177 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1179 .global GLOBAL(udivsi3_i4)
1191 #ifdef __LITTLE_ENDIAN__
1215 .align 3 ! make double below 8 byte aligned.
1220 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1221 #if ! __SH5__ || __SH5__ == 32
1222 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1224 .global GLOBAL(udivsi3_i4)
1237 #endif /* ! __SH5__ || __SH5__ == 32 */
1238 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1239 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1241 .global GLOBAL(udivsi3_i4)
1255 #ifdef __LITTLE_ENDIAN__
1275 .align 3 ! make double below 8 byte aligned.
1290 #endif /* ! __SH4__ */
1294 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1296 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1298 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1299 .global GLOBAL(udivsi3)
1303 .section .text..SHmedia32,"ax"
1309 /* The assembly code that follows is a hand-optimized version of the C
1310 code that follows. Note that the registers that are modified are
1311 exactly those listed as clobbered in the patterns udivsi3_i1 and
1318 register unsigned long long r0 asm ("r0
") = 0;
1319 register unsigned long long r18 asm ("r18
") = 1;
1320 register unsigned long long r4 asm ("r4
") = i;
1321 register unsigned long long r19 asm ("r19
") = j;
1327 r0 |= r18, r4 -= r19;
1328 while (r19 >>= 1, r18 >>= 1);
1334 pt/l LOCAL(udivsi3_dontadd), tr2
1335 pt/l LOCAL(udivsi3_loop), tr1
1343 LOCAL(udivsi3_loop):
1347 LOCAL(udivsi3_dontadd):
1355 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1361 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1363 mmulfx.w r21,r21,r19
1364 mshflo.w r21,r63,r21
1366 mmulfx.w r25,r19,r19
1370 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1371 before the msub.w, but we need a different value for
1372 r19 to keep errors under control. */
1374 mmulfx.w r19,r19,r19
1378 mmacnfx.wl r25,r19,r21
1403 #elif defined (__SHMEDIA__)
1404 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1405 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1406 So use a short shmedia loop. */
1407 // clobbered: r20,r21,r25,tr0,tr1,tr2
1409 .section .text..SHmedia32,"ax"
1412 pt/l LOCAL(udivsi3_dontsub), tr0
1413 pt/l LOCAL(udivsi3_loop), tr1
1418 LOCAL(udivsi3_loop):
1422 LOCAL(udivsi3_dontsub):
1427 #else /* ! defined (__SHMEDIA__) */
1431 div1 r5,r4; div1 r5,r4; div1 r5,r4
1432 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1435 div1 r5,r4; rotcl r0
1436 div1 r5,r4; rotcl r0
1437 div1 r5,r4; rotcl r0
1445 bf LOCAL(large_divisor)
1447 bf/s LOCAL(large_divisor)
1469 LOCAL(large_divisor):
1488 #endif /* ! __SHMEDIA__ */
1489 #endif /* __SH4__ */
1490 #endif /* L_udivsi3 */
1495 .section .text..SHmedia32,"ax"
1497 .global GLOBAL(udivdi3)
1503 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1507 sub r63,r22,r20 // r63 == 64 % 64
1509 pta LOCAL(large_divisor),tr0
1515 bgt/u r9,r63,tr0 // large_divisor
1524 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1525 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1526 the case may be, %0000000000000000 000.11111111111, still */
1527 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1532 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1534 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1543 mcmpgt.l r21,r63,r21 // See Note 1
1545 mshfhi.l r63,r21,r21
1549 /* small divisor: need a third divide step */
1559 /* could test r3 here to check for divide by zero. */
1562 LOCAL(large_divisor):
1571 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1572 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1573 the case may be, %0000000000000000 000.11111111111, still */
1574 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1579 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1581 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1585 pta LOCAL(no_lo_adj),tr0
1592 bgtu/u r7,r25,tr0 // no_lo_adj
1598 /* large_divisor: only needs a few adjustments. */
1605 /* Note 1: To shift the result of the second divide stage so that the result
1606 always fits into 32 bits, yet we still reduce the rest sufficiently
1607 would require a lot of instructions to do the shifts just right. Using
1608 the full 64 bit shift result to multiply with the divisor would require
1609 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1610 Fortunately, if the upper 32 bits of the shift result are non-zero, we
1611 know that the rest after taking this partial result into account will
1612 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1613 upper 32 bits of the partial result are non-zero. */
1614 #endif /* __SHMEDIA__ */
1615 #endif /* L_udivdi3 */
1620 .section .text..SHmedia32,"ax"
1622 .global GLOBAL(divdi3)
1624 pta GLOBAL(udivdi3),tr0
1636 #endif /* __SHMEDIA__ */
1637 #endif /* L_divdi3 */
1642 .section .text..SHmedia32,"ax"
1644 .global GLOBAL(umoddi3)
1650 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1654 sub r63,r22,r20 // r63 == 64 % 64
1656 pta LOCAL(large_divisor),tr0
1662 bgt/u r9,r63,tr0 // large_divisor
1671 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1672 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1673 the case may be, %0000000000000000 000.11111111111, still */
1674 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1679 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1681 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1686 /* bubble */ /* could test r3 here to check for divide by zero. */
1689 mcmpgt.l r21,r63,r21 // See Note 1
1691 mshfhi.l r63,r21,r21
1695 /* small divisor: need a third divide step */
1698 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1708 LOCAL(large_divisor):
1717 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1718 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1719 the case may be, %0000000000000000 000.11111111111, still */
1720 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1725 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1727 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1731 pta LOCAL(no_lo_adj),tr0
1738 bgtu/u r7,r25,tr0 // no_lo_adj
1744 /* large_divisor: only needs a few adjustments. */
1753 /* Note 1: To shift the result of the second divide stage so that the result
1754 always fits into 32 bits, yet we still reduce the rest sufficiently
1755 would require a lot of instructions to do the shifts just right. Using
1756 the full 64 bit shift result to multiply with the divisor would require
1757 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1758 Fortunately, if the upper 32 bits of the shift result are non-zero, we
1759 know that the rest after taking this partial result into account will
1760 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1761 upper 32 bits of the partial result are non-zero. */
1762 #endif /* __SHMEDIA__ */
1763 #endif /* L_umoddi3 */
1768 .section .text..SHmedia32,"ax"
1770 .global GLOBAL(moddi3)
1772 pta GLOBAL(umoddi3),tr0
1784 #endif /* __SHMEDIA__ */
1785 #endif /* L_moddi3 */
1788 #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1792 .global GLOBAL(set_fpscr)
1795 mov.l LOCAL(set_fpscr_L1),r1
1801 #if defined(__SH4__)
1804 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1813 #if defined(__SH4__)
1817 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1823 LOCAL(set_fpscr_L1):
1824 .long GLOBAL(fpscr_values)
1826 .comm GLOBAL(fpscr_values),8,4
1828 .comm GLOBAL(fpscr_values),8
1830 #endif /* SH3E / SH4 */
1831 #endif /* L_set_fpscr */
1832 #ifdef L_ic_invalidate
1835 .section .text..SHmedia32,"ax"
1837 .global GLOBAL(init_trampoline)
1838 GLOBAL(init_trampoline):
1840 #ifdef __LITTLE_ENDIAN__
1846 movi 0xffffffffffffd002,r20
1853 .global GLOBAL(ic_invalidate)
1854 GLOBAL(ic_invalidate):
1861 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1862 .global GLOBAL(ic_invalidate)
1863 GLOBAL(ic_invalidate):
1867 /* Compute how many cache lines 0f is away from r4. */
1870 /* Prepare to branch to 0f plus the cache-line offset. */
1877 /* This must be aligned to the beginning of a cache line. */
1879 .rept 256 /* There are 256 cache lines of 32 bytes. */
1886 #endif /* L_ic_invalidate */
1888 #if defined (__SH5__) && __SH5__ == 32
1889 #ifdef L_shcompact_call_trampoline
1892 LOCAL(ct_main_table):
1893 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
1894 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
1895 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
1896 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
1897 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
1898 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
1899 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
1900 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
1901 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
1902 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
1903 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
1904 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
1905 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
1906 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
1907 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
1908 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
1909 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
1910 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
1911 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
1912 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
1913 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
1914 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
1915 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
1916 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
1917 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
1918 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
1919 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
1920 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1921 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1922 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1923 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1924 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
1925 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
1927 .section .text..SHmedia32, "ax"
1930 /* This function loads 64-bit general-purpose registers from the
1931 stack, from a memory address contained in them or from an FP
1932 register, according to a cookie passed in r1. Its execution
1933 time is linear on the number of registers that actually have
1934 to be copied. See sh.h for details on the actual bit pattern.
1936 The function to be called is passed in r0. If a 32-bit return
1937 value is expected, the actual function will be tail-called,
1938 otherwise the return address will be stored in r10 (that the
1939 caller should expect to be clobbered) and the return value
1940 will be expanded into r2/r3 upon return. */
1942 .global GLOBAL(GCC_shcompact_call_trampoline)
1943 GLOBAL(GCC_shcompact_call_trampoline):
1944 ptabs/l r0, tr0 /* Prepare to call the actual function. */
1945 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
1946 pt/l LOCAL(ct_loop), tr1
1948 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
1953 LOCAL(ct_main_label):
1956 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
1957 /* It must be dr0, so just do it. */
1963 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
1964 /* It is either dr0 or dr2. */
1973 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
1974 shlri r1, 23 - 3, r34
1975 andi r34, 3 << 3, r33
1976 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
1977 LOCAL(ct_r4_fp_base):
1983 LOCAL(ct_r4_fp_copy):
1990 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
1991 shlri r1, 20 - 3, r34
1992 andi r34, 3 << 3, r33
1993 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
1994 LOCAL(ct_r5_fp_base):
2000 LOCAL(ct_r5_fp_copy):
2009 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2010 /* It must be dr8. */
2016 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2017 shlri r1, 16 - 3, r34
2018 andi r34, 3 << 3, r33
2019 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2020 LOCAL(ct_r6_fp_base):
2026 LOCAL(ct_r6_fp_copy):
2035 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2036 /* It is either dr8 or dr10. */
2044 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2045 shlri r1, 12 - 3, r34
2046 andi r34, 3 << 3, r33
2047 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2048 LOCAL(ct_r7_fp_base):
2053 LOCAL(ct_r7_fp_copy):
2062 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2063 /* It is either dr8 or dr10. */
2065 andi r1, 1 << 8, r32
2071 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2072 shlri r1, 8 - 3, r34
2073 andi r34, 3 << 3, r33
2074 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2075 LOCAL(ct_r8_fp_base):
2080 LOCAL(ct_r8_fp_copy):
2089 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2090 /* It is either dr8 or dr10. */
2092 andi r1, 1 << 4, r32
2098 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2099 shlri r1, 4 - 3, r34
2100 andi r34, 3 << 3, r33
2101 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2102 LOCAL(ct_r9_fp_base):
2107 LOCAL(ct_r9_fp_copy):
2116 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2117 pt/l LOCAL(ct_r2_load), tr2
2126 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2127 pt/l LOCAL(ct_r3_load), tr2
2135 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2136 pt/l LOCAL(ct_r4_load), tr2
2144 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2145 pt/l LOCAL(ct_r5_load), tr2
2153 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2154 pt/l LOCAL(ct_r6_load), tr2
2161 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2162 pt/l LOCAL(ct_r7_load), tr2
2169 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2170 pt/l LOCAL(ct_r8_load), tr2
2177 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2178 pt/l LOCAL(ct_check_tramp), tr2
2202 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2209 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2216 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2223 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2230 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2237 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2243 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2249 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2250 andi r1, 7 << 1, r30
2251 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2253 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2257 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2270 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2273 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2274 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2275 pt/u LOCAL(ct_ret_wide), tr2
2278 LOCAL(ct_call_func): /* Just branch to the function. */
2280 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2281 64-bit return value. */
2285 #if __LITTLE_ENDIAN__
2293 #endif /* L_shcompact_call_trampoline */
2295 #ifdef L_shcompact_return_trampoline
2296 /* This function does the converse of the code in `ret_wide'
2297 above. It is tail-called by SHcompact functions returning
2298 64-bit non-floating-point values, to pack the 32-bit values in
2299 r2 and r3 into r2. */
2302 .section .text..SHmedia32, "ax"
2304 .global GLOBAL(GCC_shcompact_return_trampoline)
2305 GLOBAL(GCC_shcompact_return_trampoline):
2307 #if __LITTLE_ENDIAN__
2316 #endif /* L_shcompact_return_trampoline */
2318 #ifdef L_shcompact_incoming_args
2321 LOCAL(ia_main_table):
2322 .word 1 /* Invalid, just loop */
2323 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2324 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2325 .word 1 /* Invalid, just loop */
2326 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2327 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2328 .word 1 /* Invalid, just loop */
2329 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2330 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2331 .word 1 /* Invalid, just loop */
2332 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2333 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2334 .word 1 /* Invalid, just loop */
2335 .word 1 /* Invalid, just loop */
2336 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2337 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2338 .word 1 /* Invalid, just loop */
2339 .word 1 /* Invalid, just loop */
2340 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2341 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2342 .word 1 /* Invalid, just loop */
2343 .word 1 /* Invalid, just loop */
2344 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2345 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2346 .word 1 /* Invalid, just loop */
2347 .word 1 /* Invalid, just loop */
2348 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2349 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2350 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2351 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2352 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2353 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2354 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2356 .section .text..SHmedia32, "ax"
2359 /* This function stores 64-bit general-purpose registers back in
2360 the stack, and loads the address in which each register
2361 was stored into itself. The lower 32 bits of r17 hold the address
2362 to begin storing, and the upper 32 bits of r17 hold the cookie.
2363 Its execution time is linear on the
2364 number of registers that actually have to be copied, and it is
2365 optimized for structures larger than 64 bits, as opposed to
2366 invidivual `long long' arguments. See sh.h for details on the
2367 actual bit pattern. */
2369 .global GLOBAL(GCC_shcompact_incoming_args)
2370 GLOBAL(GCC_shcompact_incoming_args):
2371 ptabs/l r18, tr0 /* Prepare to return. */
2372 shlri r17, 32, r0 /* Load the cookie. */
2373 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2374 pt/l LOCAL(ia_loop), tr1
2376 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2381 LOCAL(ia_main_label):
2384 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2393 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2402 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2411 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2420 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2429 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2437 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2445 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2449 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2456 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2463 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2470 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2477 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2484 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2490 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2496 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2497 andi r0, 7 << 1, r38
2498 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2500 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2504 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2517 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2519 LOCAL(ia_return): /* Return. */
2521 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2522 #endif /* L_shcompact_incoming_args */
2525 #ifdef L_nested_trampoline
2527 .section .text..SHmedia32,"ax"
2531 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2532 .global GLOBAL(GCC_nested_trampoline)
2533 GLOBAL(GCC_nested_trampoline):
2549 #endif /* L_nested_trampoline */
2550 #endif /* __SH5__ */
2552 #ifdef L_push_pop_shmedia_regs
2553 .section .text..SHmedia32,"ax"
2556 #ifndef __SH4_NOFPU__
2557 .global GLOBAL(GCC_push_shmedia_regs)
2558 GLOBAL(GCC_push_shmedia_regs):
2559 addi.l r15, -14*8, r15
2560 fst.d r15, 13*8, dr62
2561 fst.d r15, 12*8, dr60
2562 fst.d r15, 11*8, dr58
2563 fst.d r15, 10*8, dr56
2564 fst.d r15, 9*8, dr54
2565 fst.d r15, 8*8, dr52
2566 fst.d r15, 7*8, dr50
2567 fst.d r15, 6*8, dr48
2568 fst.d r15, 5*8, dr46
2569 fst.d r15, 4*8, dr44
2570 fst.d r15, 3*8, dr42
2571 fst.d r15, 2*8, dr40
2572 fst.d r15, 1*8, dr38
2573 fst.d r15, 0*8, dr36
2575 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2576 GLOBAL(GCC_push_shmedia_regs_nofpu):
2578 addi.l r15, -27*8, r15
2611 #ifndef __SH4_NOFPU__
2612 .global GLOBAL(GCC_pop_shmedia_regs)
2613 GLOBAL(GCC_pop_shmedia_regs):
2616 fld.d r15, 40*8, dr62
2617 fld.d r15, 39*8, dr60
2618 fld.d r15, 38*8, dr58
2619 fld.d r15, 37*8, dr56
2620 fld.d r15, 36*8, dr54
2621 fld.d r15, 35*8, dr52
2622 fld.d r15, 34*8, dr50
2623 fld.d r15, 33*8, dr48
2624 fld.d r15, 32*8, dr46
2625 fld.d r15, 31*8, dr44
2626 fld.d r15, 30*8, dr42
2627 fld.d r15, 29*8, dr40
2628 fld.d r15, 28*8, dr38
2629 fld.d r15, 27*8, dr36
2632 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2633 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2669 #endif /* __SH5__ == 32 */
2670 #endif /* L_push_pop_shmedia_regs */