1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 Free Software Foundation
, Inc.
5 This file is free software
; you can redistribute it and/or modify it
6 under the terms of the GNU General
Public License as published by the
7 Free Software Foundation
; either version 2, or (at your option) any
10 In addition to the permissions
in the GNU General
Public License
, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of
this file
into combinations with other programs
,
13 and to distribute those combinations without any restriction coming
14 from the use of
this file.
(The General
Public License restrictions
15 do apply
in other respects
; for example, they cover modification of
16 the file
, and distribution when
not linked
into a combine
19 This file is distributed
in the hope that it will be useful
, but
20 WITHOUT ANY WARRANTY
; without even the implied warranty of
21 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General
Public License for more details.
24 You should have received a copy of the GNU General
Public License
25 along with
this program
; see the file COPYING. If not, write to
26 the Free Software Foundation
, 51 Franklin Street
, Fifth Floor
,
27 Boston
, MA
02110-1301, USA.
*/
29 !! libgcc routines for the Renesas
/ SuperH SH CPUs.
30 !! Contributed by Steve Chamberlain.
33 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
34 !! recoded
in assembly by Toshiyasu Morita
37 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
38 ELF
local label prefixes by J
"orn Rennecke
42 #define LOCAL(X) .L_##X
43 #define FUNC(X) .type X,@function
44 #define HIDDEN_FUNC(X) FUNC(X); .hidden X
45 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
46 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
47 #define ENDFUNC(X) ENDFUNC0(X)
49 #define LOCAL(X) L_##X
51 #define HIDDEN_FUNC(X)
52 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
56 #define CONCAT(A,B) A##B
57 #define GLOBAL0(U,X) CONCAT(U,__##X)
58 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
60 #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
69 .global GLOBAL(ashiftrt_r4_0)
70 .global GLOBAL(ashiftrt_r4_1)
71 .global GLOBAL(ashiftrt_r4_2)
72 .global GLOBAL(ashiftrt_r4_3)
73 .global GLOBAL(ashiftrt_r4_4)
74 .global GLOBAL(ashiftrt_r4_5)
75 .global GLOBAL(ashiftrt_r4_6)
76 .global GLOBAL(ashiftrt_r4_7)
77 .global GLOBAL(ashiftrt_r4_8)
78 .global GLOBAL(ashiftrt_r4_9)
79 .global GLOBAL(ashiftrt_r4_10)
80 .global GLOBAL(ashiftrt_r4_11)
81 .global GLOBAL(ashiftrt_r4_12)
82 .global GLOBAL(ashiftrt_r4_13)
83 .global GLOBAL(ashiftrt_r4_14)
84 .global GLOBAL(ashiftrt_r4_15)
85 .global GLOBAL(ashiftrt_r4_16)
86 .global GLOBAL(ashiftrt_r4_17)
87 .global GLOBAL(ashiftrt_r4_18)
88 .global GLOBAL(ashiftrt_r4_19)
89 .global GLOBAL(ashiftrt_r4_20)
90 .global GLOBAL(ashiftrt_r4_21)
91 .global GLOBAL(ashiftrt_r4_22)
92 .global GLOBAL(ashiftrt_r4_23)
93 .global GLOBAL(ashiftrt_r4_24)
94 .global GLOBAL(ashiftrt_r4_25)
95 .global GLOBAL(ashiftrt_r4_26)
96 .global GLOBAL(ashiftrt_r4_27)
97 .global GLOBAL(ashiftrt_r4_28)
98 .global GLOBAL(ashiftrt_r4_29)
99 .global GLOBAL(ashiftrt_r4_30)
100 .global GLOBAL(ashiftrt_r4_31)
101 .global GLOBAL(ashiftrt_r4_32)
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
118 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
119 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
120 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
121 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
122 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
123 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
124 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
125 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
126 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
127 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
128 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
129 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
130 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
131 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
132 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
133 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
134 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
135 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
138 GLOBAL(ashiftrt_r4_32):
139 GLOBAL(ashiftrt_r4_31):
144 GLOBAL(ashiftrt_r4_30):
146 GLOBAL(ashiftrt_r4_29):
148 GLOBAL(ashiftrt_r4_28):
150 GLOBAL(ashiftrt_r4_27):
152 GLOBAL(ashiftrt_r4_26):
154 GLOBAL(ashiftrt_r4_25):
156 GLOBAL(ashiftrt_r4_24):
162 GLOBAL(ashiftrt_r4_23):
164 GLOBAL(ashiftrt_r4_22):
166 GLOBAL(ashiftrt_r4_21):
168 GLOBAL(ashiftrt_r4_20):
170 GLOBAL(ashiftrt_r4_19):
172 GLOBAL(ashiftrt_r4_18):
174 GLOBAL(ashiftrt_r4_17):
176 GLOBAL(ashiftrt_r4_16):
181 GLOBAL(ashiftrt_r4_15):
183 GLOBAL(ashiftrt_r4_14):
185 GLOBAL(ashiftrt_r4_13):
187 GLOBAL(ashiftrt_r4_12):
189 GLOBAL(ashiftrt_r4_11):
191 GLOBAL(ashiftrt_r4_10):
193 GLOBAL(ashiftrt_r4_9):
195 GLOBAL(ashiftrt_r4_8):
197 GLOBAL(ashiftrt_r4_7):
199 GLOBAL(ashiftrt_r4_6):
201 GLOBAL(ashiftrt_r4_5):
203 GLOBAL(ashiftrt_r4_4):
205 GLOBAL(ashiftrt_r4_3):
207 GLOBAL(ashiftrt_r4_2):
209 GLOBAL(ashiftrt_r4_1):
213 GLOBAL(ashiftrt_r4_0):
217 ENDFUNC(GLOBAL(ashiftrt_r4_0))
218 ENDFUNC(GLOBAL(ashiftrt_r4_1))
219 ENDFUNC(GLOBAL(ashiftrt_r4_2))
220 ENDFUNC(GLOBAL(ashiftrt_r4_3))
221 ENDFUNC(GLOBAL(ashiftrt_r4_4))
222 ENDFUNC(GLOBAL(ashiftrt_r4_5))
223 ENDFUNC(GLOBAL(ashiftrt_r4_6))
224 ENDFUNC(GLOBAL(ashiftrt_r4_7))
225 ENDFUNC(GLOBAL(ashiftrt_r4_8))
226 ENDFUNC(GLOBAL(ashiftrt_r4_9))
227 ENDFUNC(GLOBAL(ashiftrt_r4_10))
228 ENDFUNC(GLOBAL(ashiftrt_r4_11))
229 ENDFUNC(GLOBAL(ashiftrt_r4_12))
230 ENDFUNC(GLOBAL(ashiftrt_r4_13))
231 ENDFUNC(GLOBAL(ashiftrt_r4_14))
232 ENDFUNC(GLOBAL(ashiftrt_r4_15))
233 ENDFUNC(GLOBAL(ashiftrt_r4_16))
234 ENDFUNC(GLOBAL(ashiftrt_r4_17))
235 ENDFUNC(GLOBAL(ashiftrt_r4_18))
236 ENDFUNC(GLOBAL(ashiftrt_r4_19))
237 ENDFUNC(GLOBAL(ashiftrt_r4_20))
238 ENDFUNC(GLOBAL(ashiftrt_r4_21))
239 ENDFUNC(GLOBAL(ashiftrt_r4_22))
240 ENDFUNC(GLOBAL(ashiftrt_r4_23))
241 ENDFUNC(GLOBAL(ashiftrt_r4_24))
242 ENDFUNC(GLOBAL(ashiftrt_r4_25))
243 ENDFUNC(GLOBAL(ashiftrt_r4_26))
244 ENDFUNC(GLOBAL(ashiftrt_r4_27))
245 ENDFUNC(GLOBAL(ashiftrt_r4_28))
246 ENDFUNC(GLOBAL(ashiftrt_r4_29))
247 ENDFUNC(GLOBAL(ashiftrt_r4_30))
248 ENDFUNC(GLOBAL(ashiftrt_r4_31))
249 ENDFUNC(GLOBAL(ashiftrt_r4_32))
271 .global GLOBAL(ashrsi3)
272 HIDDEN_FUNC(GLOBAL(ashrsi3))
277 mova LOCAL(ashrsi3_table),r0
288 LOCAL(ashrsi3_table):
289 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
318 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
319 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
320 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
400 ENDFUNC(GLOBAL(ashrsi3))
421 .global GLOBAL(ashlsi3)
422 HIDDEN_FUNC(GLOBAL(ashlsi3))
427 mova LOCAL(ashlsi3_table),r0
438 LOCAL(ashlsi3_table):
439 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
468 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
469 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
470 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
559 ENDFUNC(GLOBAL(ashlsi3))
580 .global GLOBAL(lshrsi3)
581 HIDDEN_FUNC(GLOBAL(lshrsi3))
586 mova LOCAL(lshrsi3_table),r0
597 LOCAL(lshrsi3_table):
598 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
627 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
628 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
629 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
718 ENDFUNC(GLOBAL(lshrsi3))
724 .global GLOBAL(movmem)
725 HIDDEN_FUNC(GLOBAL(movmem))
726 HIDDEN_ALIAS(movstr,movmem)
727 /* This would be a lot simpler if r6 contained the byte count
728 minus 64, and we wouldn't be called here for a byte count of 64. */
732 bsr GLOBAL(movmemSI52+2)
735 LOCAL(movmem_loop): /* Reached with rts */
741 bt LOCAL(movmem_done)
748 bt GLOBAL(movmemSI52)
749 ! done all the large groups, do the remainder
751 mova GLOBAL(movmemSI4)+4,r0
754 LOCAL(movmem_done): ! share slot insn, works out aligned.
761 ! ??? We need aliases movstr* for movmem* for the older libraries. These
762 ! aliases will be removed at the some point in the future.
763 .global GLOBAL(movmemSI64)
764 HIDDEN_FUNC(GLOBAL(movmemSI64))
765 HIDDEN_ALIAS(movstrSI64,movmemSI64)
769 .global GLOBAL(movmemSI60)
770 HIDDEN_FUNC(GLOBAL(movmemSI60))
771 HIDDEN_ALIAS(movstrSI60,movmemSI60)
775 .global GLOBAL(movmemSI56)
776 HIDDEN_FUNC(GLOBAL(movmemSI56))
777 HIDDEN_ALIAS(movstrSI56,movmemSI56)
781 .global GLOBAL(movmemSI52)
782 HIDDEN_FUNC(GLOBAL(movmemSI52))
783 HIDDEN_ALIAS(movstrSI52,movmemSI52)
787 .global GLOBAL(movmemSI48)
788 HIDDEN_FUNC(GLOBAL(movmemSI48))
789 HIDDEN_ALIAS(movstrSI48,movmemSI48)
793 .global GLOBAL(movmemSI44)
794 HIDDEN_FUNC(GLOBAL(movmemSI44))
795 HIDDEN_ALIAS(movstrSI44,movmemSI44)
799 .global GLOBAL(movmemSI40)
800 HIDDEN_FUNC(GLOBAL(movmemSI40))
801 HIDDEN_ALIAS(movstrSI40,movmemSI40)
805 .global GLOBAL(movmemSI36)
806 HIDDEN_FUNC(GLOBAL(movmemSI36))
807 HIDDEN_ALIAS(movstrSI36,movmemSI36)
811 .global GLOBAL(movmemSI32)
812 HIDDEN_FUNC(GLOBAL(movmemSI32))
813 HIDDEN_ALIAS(movstrSI32,movmemSI32)
817 .global GLOBAL(movmemSI28)
818 HIDDEN_FUNC(GLOBAL(movmemSI28))
819 HIDDEN_ALIAS(movstrSI28,movmemSI28)
823 .global GLOBAL(movmemSI24)
824 HIDDEN_FUNC(GLOBAL(movmemSI24))
825 HIDDEN_ALIAS(movstrSI24,movmemSI24)
829 .global GLOBAL(movmemSI20)
830 HIDDEN_FUNC(GLOBAL(movmemSI20))
831 HIDDEN_ALIAS(movstrSI20,movmemSI20)
835 .global GLOBAL(movmemSI16)
836 HIDDEN_FUNC(GLOBAL(movmemSI16))
837 HIDDEN_ALIAS(movstrSI16,movmemSI16)
841 .global GLOBAL(movmemSI12)
842 HIDDEN_FUNC(GLOBAL(movmemSI12))
843 HIDDEN_ALIAS(movstrSI12,movmemSI12)
847 .global GLOBAL(movmemSI8)
848 HIDDEN_FUNC(GLOBAL(movmemSI8))
849 HIDDEN_ALIAS(movstrSI8,movmemSI8)
853 .global GLOBAL(movmemSI4)
854 HIDDEN_FUNC(GLOBAL(movmemSI4))
855 HIDDEN_ALIAS(movstrSI4,movmemSI4)
861 ENDFUNC(GLOBAL(movmemSI64))
862 ENDFUNC(GLOBAL(movmemSI60))
863 ENDFUNC(GLOBAL(movmemSI56))
864 ENDFUNC(GLOBAL(movmemSI52))
865 ENDFUNC(GLOBAL(movmemSI48))
866 ENDFUNC(GLOBAL(movmemSI44))
867 ENDFUNC(GLOBAL(movmemSI40))
868 ENDFUNC(GLOBAL(movmemSI36))
869 ENDFUNC(GLOBAL(movmemSI32))
870 ENDFUNC(GLOBAL(movmemSI28))
871 ENDFUNC(GLOBAL(movmemSI24))
872 ENDFUNC(GLOBAL(movmemSI20))
873 ENDFUNC(GLOBAL(movmemSI16))
874 ENDFUNC(GLOBAL(movmemSI12))
875 ENDFUNC(GLOBAL(movmemSI8))
876 ENDFUNC(GLOBAL(movmemSI4))
877 ENDFUNC(GLOBAL(movmem))
882 .global GLOBAL(movmem_i4_even)
883 .global GLOBAL(movmem_i4_odd)
884 .global GLOBAL(movmemSI12_i4)
886 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
887 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
888 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
890 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
891 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
892 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
902 GLOBAL(movmem_i4_even):
904 bra L_movmem_start_even
907 GLOBAL(movmem_i4_odd):
919 bt/s L_movmem_2mod4_end
933 ENDFUNC(GLOBAL(movmem_i4_even))
934 ENDFUNC(GLOBAL(movmem_i4_odd))
937 GLOBAL(movmemSI12_i4):
946 ENDFUNC(GLOBAL(movmemSI12_i4))
952 .global GLOBAL(mulsi3)
953 HIDDEN_FUNC(GLOBAL(mulsi3))
957 ! r0 = aabb*ccdd via partial products
959 ! if aa == 0 and cc = 0
963 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
967 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
968 mov r5,r3 ! r3 = ccdd
969 swap.w r4,r2 ! r2 = bbaa
970 xtrct r2,r3 ! r3 = aacc
971 tst r3,r3 ! msws zero ?
973 rts ! yes - then we have the answer
976 hiset: sts macl,r0 ! r0 = bb*dd
977 mulu.w r2,r5 ! brewing macl = aa*dd
979 mulu.w r3,r4 ! brewing macl = cc*bb
986 ENDFUNC(GLOBAL(mulsi3))
988 #endif /* ! __SH5__ */
991 !! 4 byte integer Divide code for the Renesas SH
993 !! args in r4 and r5, result in fpul, clobber dr0, dr2
995 .global GLOBAL(sdivsi3_i4)
996 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1006 ENDFUNC(GLOBAL(sdivsi3_i4))
1007 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1008 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1010 #if ! __SH5__ || __SH5__ == 32
1014 .global GLOBAL(sdivsi3_i4)
1015 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1030 ENDFUNC(GLOBAL(sdivsi3_i4))
1031 #endif /* ! __SH5__ || __SH5__ == 32 */
1032 #endif /* ! __SH4__ */
1036 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1038 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1040 !! Steve Chamberlain
1045 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1047 .global GLOBAL(sdivsi3)
1050 .section .text..SHmedia32,"ax"
1056 /* The assembly code that follows is a hand-optimized version of the C
1057 code that follows. Note that the registers that are modified are
1058 exactly those listed as clobbered in the patterns divsi3_i1 and
1061 int __sdivsi3 (i, j)
1064 register unsigned long long r18 asm ("r18
");
1065 register unsigned long long r19 asm ("r19
");
1066 register unsigned long long r0 asm ("r0
") = 0;
1067 register unsigned long long r1 asm ("r1
") = 1;
1068 register int r2 asm ("r2
") = i >> 31;
1069 register int r3 asm ("r3
") = j >> 31;
1081 r0 |= r1, r18 -= r19;
1082 while (r19 >>= 1, r1 >>= 1);
1084 return r2 * (int)r0;
1088 pt/l LOCAL(sdivsi3_dontadd), tr2
1089 pt/l LOCAL(sdivsi3_loop), tr1
1102 LOCAL(sdivsi3_loop):
1106 LOCAL(sdivsi3_dontadd):
1115 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1118 // can create absolute value without extra latency,
1119 // but dependent on proper sign extension of inputs:
1122 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1125 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1126 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1134 // If r4 was to be used in-place instead of r21, could use this sequence
1135 // to compute absolute:
1136 // sub r63,r4,r19 // compute absolute value of r4
1137 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1138 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1150 mmacnfx.wl r25,r2,r1
1175 #else /* ! 0 && ! 0 */
1178 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1180 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1182 FUNC(GLOBAL(sdivsi3))
1183 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1184 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1185 // with the SHcompact implementation, which clobbers tr1 / tr2.
1186 .global GLOBAL(sdivsi3_1)
1188 .global GLOBAL(div_table_internal)
1189 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1190 shori GLOBAL(div_table_internal) & 65535, r20
1192 .global GLOBAL(sdivsi3_2)
1194 // clobbered: r1,r18,r19,r21,r25,tr0
1197 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1198 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1199 ldx.ub r20, r21, r19 // u0.8
1200 shari r25, 32, r25 // normalize to s2.30
1202 muls.l r25, r19, r19 // s2.38
1203 ldx.w r20, r21, r21 // s2.14
1205 shari r19, 24, r19 // truncate to s2.14
1206 sub r21, r19, r19 // some 11 bit inverse in s1.14
1207 muls.l r19, r19, r21 // u0.28
1210 muls.l r25, r21, r18 // s2.58
1211 shlli r19, 45, r19 // multiply by two and convert to s2.58
1214 shari r18, 28, r18 // some 22 bit inverse in s1.30
1215 muls.l r18, r25, r0 // s2.60
1216 muls.l r18, r4, r25 // s32.30
1218 shari r0, 16, r19 // s-16.44
1219 muls.l r19, r18, r19 // s-16.74
1221 shari r4, 14, r18 // s19.-14
1222 shari r19, 30, r19 // s-16.44
1223 muls.l r19, r18, r19 // s15.30
1224 xor r21, r0, r21 // You could also use the constant 1 << 27.
1231 ENDFUNC(GLOBAL(sdivsi3))
1233 ENDFUNC(GLOBAL(sdivsi3_2))
1235 #elif defined __SHMEDIA__
1236 /* m5compact-nofpu */
1237 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1239 .section .text..SHmedia32,"ax"
1241 FUNC(GLOBAL(sdivsi3))
1243 pt/l LOCAL(sdivsi3_dontsub), tr0
1244 pt/l LOCAL(sdivsi3_loop), tr1
1256 LOCAL(sdivsi3_loop):
1260 LOCAL(sdivsi3_dontsub):
1266 ENDFUNC(GLOBAL(sdivsi3))
1267 #else /* ! __SHMEDIA__ */
1268 FUNC(GLOBAL(sdivsi3))
1353 ENDFUNC(GLOBAL(sdivsi3))
1354 #endif /* ! __SHMEDIA__ */
1355 #endif /* ! __SH4__ */
1360 !! 4 byte integer Divide code for the Renesas SH
1362 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1365 .global GLOBAL(udivsi3_i4)
1366 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1378 #ifdef __LITTLE_ENDIAN__
1402 .align 3 ! make double below 8 byte aligned.
1407 ENDFUNC(GLOBAL(udivsi3_i4))
1408 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1409 #if ! __SH5__ || __SH5__ == 32
1410 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1412 .global GLOBAL(udivsi3_i4)
1413 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1427 ENDFUNC(GLOBAL(udivsi3_i4))
1428 #endif /* ! __SH5__ || __SH5__ == 32 */
1429 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1430 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1432 .global GLOBAL(udivsi3_i4)
1433 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1447 #ifdef __LITTLE_ENDIAN__
1467 .align 3 ! make double below 8 byte aligned.
1482 ENDFUNC(GLOBAL(udivsi3_i4))
1483 #endif /* ! __SH4__ */
1487 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1489 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1491 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1492 .global GLOBAL(udivsi3)
1493 HIDDEN_FUNC(GLOBAL(udivsi3))
1497 .section .text..SHmedia32,"ax"
1503 /* The assembly code that follows is a hand-optimized version of the C
1504 code that follows. Note that the registers that are modified are
1505 exactly those listed as clobbered in the patterns udivsi3_i1 and
1512 register unsigned long long r0 asm ("r0
") = 0;
1513 register unsigned long long r18 asm ("r18
") = 1;
1514 register unsigned long long r4 asm ("r4
") = i;
1515 register unsigned long long r19 asm ("r19
") = j;
1521 r0 |= r18, r4 -= r19;
1522 while (r19 >>= 1, r18 >>= 1);
1528 pt/l LOCAL(udivsi3_dontadd), tr2
1529 pt/l LOCAL(udivsi3_loop), tr1
1537 LOCAL(udivsi3_loop):
1541 LOCAL(udivsi3_dontadd):
1549 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1555 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1557 mmulfx.w r21,r21,r19
1558 mshflo.w r21,r63,r21
1560 mmulfx.w r25,r19,r19
1564 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1565 before the msub.w, but we need a different value for
1566 r19 to keep errors under control. */
1568 mmulfx.w r19,r19,r19
1572 mmacnfx.wl r25,r19,r21
1597 #elif defined (__SHMEDIA__)
1598 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1599 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1600 So use a short shmedia loop. */
1601 // clobbered: r20,r21,r25,tr0,tr1,tr2
1603 .section .text..SHmedia32,"ax"
1606 pt/l LOCAL(udivsi3_dontsub), tr0
1607 pt/l LOCAL(udivsi3_loop), tr1
1612 LOCAL(udivsi3_loop):
1616 LOCAL(udivsi3_dontsub):
1621 #else /* ! defined (__SHMEDIA__) */
1625 div1 r5,r4; div1 r5,r4; div1 r5,r4
1626 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1629 div1 r5,r4; rotcl r0
1630 div1 r5,r4; rotcl r0
1631 div1 r5,r4; rotcl r0
1639 bf LOCAL(large_divisor)
1641 bf/s LOCAL(large_divisor)
1663 LOCAL(large_divisor):
1682 ENDFUNC(GLOBAL(udivsi3))
1683 #endif /* ! __SHMEDIA__ */
1684 #endif /* __SH4__ */
1685 #endif /* L_udivsi3 */
1690 .section .text..SHmedia32,"ax"
1692 .global GLOBAL(udivdi3)
1693 FUNC(GLOBAL(udivdi3))
1695 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1700 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1704 sub r63,r22,r20 // r63 == 64 % 64
1706 pta LOCAL(large_divisor),tr0
1712 bgt/u r9,r63,tr0 // large_divisor
1721 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1722 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1723 the case may be, %0000000000000000 000.11111111111, still */
1724 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1729 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1731 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1740 mcmpgt.l r21,r63,r21 // See Note 1
1742 mshfhi.l r63,r21,r21
1746 /* small divisor: need a third divide step */
1756 /* could test r3 here to check for divide by zero. */
1759 LOCAL(large_divisor):
1768 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1769 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1770 the case may be, %0000000000000000 000.11111111111, still */
1771 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1776 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1778 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1782 pta LOCAL(no_lo_adj),tr0
1789 bgtu/u r7,r25,tr0 // no_lo_adj
1795 /* large_divisor: only needs a few adjustments. */
1802 ENDFUNC(GLOBAL(udivdi3))
1803 /* Note 1: To shift the result of the second divide stage so that the result
1804 always fits into 32 bits, yet we still reduce the rest sufficiently
1805 would require a lot of instructions to do the shifts just right. Using
1806 the full 64 bit shift result to multiply with the divisor would require
1807 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1808 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1809 know that the rest after taking this partial result into account will
1810 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1811 upper 32 bits of the partial result are nonzero. */
1812 #endif /* __SHMEDIA__ */
1813 #endif /* L_udivdi3 */
1818 .section .text..SHmedia32,"ax"
1820 .global GLOBAL(divdi3)
1821 FUNC(GLOBAL(divdi3))
1823 pta GLOBAL(udivdi3_internal),tr0
1835 ENDFUNC(GLOBAL(divdi3))
1836 #endif /* __SHMEDIA__ */
1837 #endif /* L_divdi3 */
1842 .section .text..SHmedia32,"ax"
1844 .global GLOBAL(umoddi3)
1845 FUNC(GLOBAL(umoddi3))
1847 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1852 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1856 sub r63,r22,r20 // r63 == 64 % 64
1858 pta LOCAL(large_divisor),tr0
1864 bgt/u r9,r63,tr0 // large_divisor
1873 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1874 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1875 the case may be, %0000000000000000 000.11111111111, still */
1876 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1881 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1883 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1888 /* bubble */ /* could test r3 here to check for divide by zero. */
1891 mcmpgt.l r21,r63,r21 // See Note 1
1893 mshfhi.l r63,r21,r21
1897 /* small divisor: need a third divide step */
1900 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1910 LOCAL(large_divisor):
1919 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1920 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1921 the case may be, %0000000000000000 000.11111111111, still */
1922 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1927 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1929 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1933 pta LOCAL(no_lo_adj),tr0
1940 bgtu/u r7,r25,tr0 // no_lo_adj
1946 /* large_divisor: only needs a few adjustments. */
1955 ENDFUNC(GLOBAL(umoddi3))
1956 /* Note 1: To shift the result of the second divide stage so that the result
1957 always fits into 32 bits, yet we still reduce the rest sufficiently
1958 would require a lot of instructions to do the shifts just right. Using
1959 the full 64 bit shift result to multiply with the divisor would require
1960 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1961 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1962 know that the rest after taking this partial result into account will
1963 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1964 upper 32 bits of the partial result are nonzero. */
1965 #endif /* __SHMEDIA__ */
1966 #endif /* L_umoddi3 */
1971 .section .text..SHmedia32,"ax"
1973 .global GLOBAL(moddi3)
1974 FUNC(GLOBAL(moddi3))
1976 pta GLOBAL(umoddi3_internal),tr0
1988 ENDFUNC(GLOBAL(moddi3))
1989 #endif /* __SHMEDIA__ */
1990 #endif /* L_moddi3 */
1993 #if !defined (__SH2A_NOFPU__)
1994 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1998 .global GLOBAL(set_fpscr)
1999 HIDDEN_FUNC(GLOBAL(set_fpscr))
2004 mova LOCAL(set_fpscr_L0),r0
2005 mov.l LOCAL(set_fpscr_L0),r12
2007 mov.l LOCAL(set_fpscr_L1),r0
2011 mov.l LOCAL(set_fpscr_L1),r1
2018 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2021 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2030 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2034 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2041 LOCAL(set_fpscr_L0):
2042 .long _GLOBAL_OFFSET_TABLE_
2043 LOCAL(set_fpscr_L1):
2044 .long GLOBAL(fpscr_values@GOT)
2046 LOCAL(set_fpscr_L1):
2047 .long GLOBAL(fpscr_values)
2050 ENDFUNC(GLOBAL(set_fpscr))
2051 #ifndef NO_FPSCR_VALUES
2053 .comm GLOBAL(fpscr_values),8,4
2055 .comm GLOBAL(fpscr_values),8
2057 #endif /* NO_FPSCR_VALUES */
2058 #endif /* SH2E / SH3E / SH4 */
2059 #endif /* __SH2A_NOFPU__ */
2060 #endif /* L_set_fpscr */
2061 #ifdef L_ic_invalidate
2064 .section .text..SHmedia32,"ax"
2066 .global GLOBAL(init_trampoline)
2067 HIDDEN_FUNC(GLOBAL(init_trampoline))
2068 GLOBAL(init_trampoline):
2070 #ifdef __LITTLE_ENDIAN__
2076 movi 0xffffffffffffd002,r20
2083 ENDFUNC(GLOBAL(init_trampoline))
2084 .global GLOBAL(ic_invalidate)
2085 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2086 GLOBAL(ic_invalidate):
2093 ENDFUNC(GLOBAL(ic_invalidate))
2094 #elif defined(__SH4A__)
2095 .global GLOBAL(ic_invalidate)
2096 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2097 GLOBAL(ic_invalidate):
2102 ENDFUNC(GLOBAL(ic_invalidate))
2103 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2104 /* For system code, we use ic_invalidate_line_i, but user code
2105 needs a different mechanism. A kernel call is generally not
2106 available, and it would also be slow. Different SH4 variants use
2107 different sizes and associativities of the Icache. We use a small
2108 bit of dispatch code that can be put hidden in every shared object,
2109 which calls the actual processor-specific invalidation code in a
2111 Or if you have operating system support, the OS could mmap the
2112 procesor-specific code from a single page, since it is highly
2114 .global GLOBAL(ic_invalidate)
2115 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2116 GLOBAL(ic_invalidate):
2132 0: .long GLOBAL(ic_invalidate_array)
2134 .global GLOBAL(ic_invalidate_array)
2135 /* ??? Why won't the assembler allow to add these two constants? */
2136 0: .long _GLOBAL_OFFSET_TABLE_
2137 1: .long GLOBAL(ic_invalidate_array)@GOT
2138 ENDFUNC(GLOBAL(ic_invalidate))
2139 #endif /* __pic__ */
2141 #endif /* L_ic_invalidate */
2143 #ifdef L_ic_invalidate_array
2144 #if defined(__SH4A__)
2145 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2146 .global GLOBAL(ic_invalidate_array)
2147 FUNC(GLOBAL(ic_invalidate_array))
2148 GLOBAL(ic_invalidate_array):
2154 ENDFUNC(GLOBAL(ic_invalidate_array))
2155 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2156 .global GLOBAL(ic_invalidate_array)
2158 FUNC(GLOBAL(ic_invalidate_array))
2159 /* This must be aligned to the beginning of a cache line. */
2160 GLOBAL(ic_invalidate_array):
2163 #define WAY_SIZE 0x4000
2166 .rept WAY_SIZE * WAYS / 32
2174 .rept WAY_SIZE * WAYS / 32
2188 #else /* WAYS > 6 */
2189 /* This variant needs two different pages for mmap-ing. */
2207 ENDFUNC(GLOBAL(ic_invalidate_array))
2209 #endif /* L_ic_invalidate_array */
2211 #if defined (__SH5__) && __SH5__ == 32
2212 #ifdef L_shcompact_call_trampoline
2215 LOCAL(ct_main_table):
2216 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2217 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2218 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2250 .section .text..SHmedia32, "ax"
2253 /* This function loads 64-bit general-purpose registers from the
2254 stack, from a memory address contained in them or from an FP
2255 register, according to a cookie passed in r1. Its execution
2256 time is linear on the number of registers that actually have
2257 to be copied. See sh.h for details on the actual bit pattern.
2259 The function to be called is passed in r0. If a 32-bit return
2260 value is expected, the actual function will be tail-called,
2261 otherwise the return address will be stored in r10 (that the
2262 caller should expect to be clobbered) and the return value
2263 will be expanded into r2/r3 upon return. */
2265 .global GLOBAL(GCC_shcompact_call_trampoline)
2266 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2267 GLOBAL(GCC_shcompact_call_trampoline):
2268 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2269 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2270 pt/l LOCAL(ct_loop), tr1
2272 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2277 LOCAL(ct_main_label):
2280 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2281 /* It must be dr0, so just do it. */
2287 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2288 /* It is either dr0 or dr2. */
2297 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2298 shlri r1, 23 - 3, r34
2299 andi r34, 3 << 3, r33
2300 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2301 LOCAL(ct_r4_fp_base):
2307 LOCAL(ct_r4_fp_copy):
2314 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2315 shlri r1, 20 - 3, r34
2316 andi r34, 3 << 3, r33
2317 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2318 LOCAL(ct_r5_fp_base):
2324 LOCAL(ct_r5_fp_copy):
2333 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2334 /* It must be dr8. */
2340 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2341 shlri r1, 16 - 3, r34
2342 andi r34, 3 << 3, r33
2343 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2344 LOCAL(ct_r6_fp_base):
2350 LOCAL(ct_r6_fp_copy):
2359 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2360 /* It is either dr8 or dr10. */
2368 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2369 shlri r1, 12 - 3, r34
2370 andi r34, 3 << 3, r33
2371 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2372 LOCAL(ct_r7_fp_base):
2377 LOCAL(ct_r7_fp_copy):
2386 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2387 /* It is either dr8 or dr10. */
2389 andi r1, 1 << 8, r32
2395 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2396 shlri r1, 8 - 3, r34
2397 andi r34, 3 << 3, r33
2398 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2399 LOCAL(ct_r8_fp_base):
2404 LOCAL(ct_r8_fp_copy):
2413 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2414 /* It is either dr8 or dr10. */
2416 andi r1, 1 << 4, r32
2422 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2423 shlri r1, 4 - 3, r34
2424 andi r34, 3 << 3, r33
2425 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2426 LOCAL(ct_r9_fp_base):
2431 LOCAL(ct_r9_fp_copy):
2440 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2441 pt/l LOCAL(ct_r2_load), tr2
2450 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2451 pt/l LOCAL(ct_r3_load), tr2
2459 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2460 pt/l LOCAL(ct_r4_load), tr2
2468 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2469 pt/l LOCAL(ct_r5_load), tr2
2477 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2478 pt/l LOCAL(ct_r6_load), tr2
2485 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2486 pt/l LOCAL(ct_r7_load), tr2
2493 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2494 pt/l LOCAL(ct_r8_load), tr2
2501 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2502 pt/l LOCAL(ct_check_tramp), tr2
2526 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2533 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2540 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2547 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2554 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2561 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2567 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2573 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2574 andi r1, 7 << 1, r30
2575 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2577 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2581 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2594 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2597 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2598 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2599 pt/u LOCAL(ct_ret_wide), tr2
2602 LOCAL(ct_call_func): /* Just branch to the function. */
2604 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2605 64-bit return value. */
2609 #if __LITTLE_ENDIAN__
2618 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2619 #endif /* L_shcompact_call_trampoline */
2621 #ifdef L_shcompact_return_trampoline
2622 /* This function does the converse of the code in `ret_wide'
2623 above. It is tail-called by SHcompact functions returning
2624 64-bit non-floating-point values, to pack the 32-bit values in
2625 r2 and r3 into r2. */
2628 .section .text..SHmedia32, "ax"
2630 .global GLOBAL(GCC_shcompact_return_trampoline)
2631 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2632 GLOBAL(GCC_shcompact_return_trampoline):
2634 #if __LITTLE_ENDIAN__
2644 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2645 #endif /* L_shcompact_return_trampoline */
2647 #ifdef L_shcompact_incoming_args
2650 LOCAL(ia_main_table):
2651 .word 1 /* Invalid, just loop */
2652 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2653 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2654 .word 1 /* Invalid, just loop */
2655 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2656 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2657 .word 1 /* Invalid, just loop */
2658 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2659 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2660 .word 1 /* Invalid, just loop */
2661 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2662 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2663 .word 1 /* Invalid, just loop */
2664 .word 1 /* Invalid, just loop */
2665 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2666 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2667 .word 1 /* Invalid, just loop */
2668 .word 1 /* Invalid, just loop */
2669 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2670 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2671 .word 1 /* Invalid, just loop */
2672 .word 1 /* Invalid, just loop */
2673 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2674 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2675 .word 1 /* Invalid, just loop */
2676 .word 1 /* Invalid, just loop */
2677 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2678 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2679 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2683 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2685 .section .text..SHmedia32, "ax"
2688 /* This function stores 64-bit general-purpose registers back in
2689 the stack, and loads the address in which each register
2690 was stored into itself. The lower 32 bits of r17 hold the address
2691 to begin storing, and the upper 32 bits of r17 hold the cookie.
2692 Its execution time is linear on the
2693 number of registers that actually have to be copied, and it is
2694 optimized for structures larger than 64 bits, as opposed to
2695 individual `long long' arguments. See sh.h for details on the
2696 actual bit pattern. */
2698 .global GLOBAL(GCC_shcompact_incoming_args)
2699 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2700 GLOBAL(GCC_shcompact_incoming_args):
2701 ptabs/l r18, tr0 /* Prepare to return. */
2702 shlri r17, 32, r0 /* Load the cookie. */
2703 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2704 pt/l LOCAL(ia_loop), tr1
2706 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2711 LOCAL(ia_main_label):
2714 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2723 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2732 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2741 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2750 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2759 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2767 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2775 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2779 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2786 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2793 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2800 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2807 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2814 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2820 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2826 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2827 andi r0, 7 << 1, r38
2828 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2830 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2834 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2847 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2849 LOCAL(ia_return): /* Return. */
2851 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2852 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2853 #endif /* L_shcompact_incoming_args */
2856 #ifdef L_nested_trampoline
2858 .section .text..SHmedia32,"ax"
2862 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2863 .global GLOBAL(GCC_nested_trampoline)
2864 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2865 GLOBAL(GCC_nested_trampoline):
2882 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2883 #endif /* L_nested_trampoline */
2884 #endif /* __SH5__ */
2886 #ifdef L_push_pop_shmedia_regs
2887 .section .text..SHmedia32,"ax"
2890 #ifndef __SH4_NOFPU__
2891 .global GLOBAL(GCC_push_shmedia_regs)
2892 FUNC(GLOBAL(GCC_push_shmedia_regs))
2893 GLOBAL(GCC_push_shmedia_regs):
2894 addi.l r15, -14*8, r15
2895 fst.d r15, 13*8, dr62
2896 fst.d r15, 12*8, dr60
2897 fst.d r15, 11*8, dr58
2898 fst.d r15, 10*8, dr56
2899 fst.d r15, 9*8, dr54
2900 fst.d r15, 8*8, dr52
2901 fst.d r15, 7*8, dr50
2902 fst.d r15, 6*8, dr48
2903 fst.d r15, 5*8, dr46
2904 fst.d r15, 4*8, dr44
2905 fst.d r15, 3*8, dr42
2906 fst.d r15, 2*8, dr40
2907 fst.d r15, 1*8, dr38
2908 fst.d r15, 0*8, dr36
2909 #else /* ! __SH4_NOFPU__ */
2910 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2911 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2912 GLOBAL(GCC_push_shmedia_regs_nofpu):
2913 #endif /* ! __SH4_NOFPU__ */
2915 addi.l r15, -27*8, r15
2947 #ifndef __SH4_NOFPU__
2948 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2950 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2952 #ifndef __SH4_NOFPU__
2953 .global GLOBAL(GCC_pop_shmedia_regs)
2954 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2955 GLOBAL(GCC_pop_shmedia_regs):
2958 fld.d r15, 40*8, dr62
2959 fld.d r15, 39*8, dr60
2960 fld.d r15, 38*8, dr58
2961 fld.d r15, 37*8, dr56
2962 fld.d r15, 36*8, dr54
2963 fld.d r15, 35*8, dr52
2964 fld.d r15, 34*8, dr50
2965 fld.d r15, 33*8, dr48
2966 fld.d r15, 32*8, dr46
2967 fld.d r15, 31*8, dr44
2968 fld.d r15, 30*8, dr42
2969 fld.d r15, 29*8, dr40
2970 fld.d r15, 28*8, dr38
2971 fld.d r15, 27*8, dr36
2973 #else /* ! __SH4_NOFPU__ */
2974 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2975 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2976 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2977 #endif /* ! __SH4_NOFPU__ */
3014 #ifndef __SH4_NOFPU__
3015 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3017 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3019 #endif /* __SH5__ == 32 */
3020 #endif /* L_push_pop_shmedia_regs */
3024 #if defined(__pic__) && defined(__SHMEDIA__)
3025 .global GLOBAL(sdivsi3)
3026 FUNC(GLOBAL(sdivsi3))
3028 .section .text..SHmedia32,"ax"
3033 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3034 in a text section does not work (at least for shared libraries):
3035 the linker sets the LSB of the address as if this was SHmedia code. */
3036 #define TEXT_DATA_BUG
3040 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3042 .global GLOBAL(sdivsi3)
3044 #ifdef TEXT_DATA_BUG
3045 ptb datalabel Local_div_table,tr0
3047 ptb GLOBAL(div_table_internal),tr0
3050 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3051 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3054 ldx.ub r20, r21, r19 // u0.8
3055 shari r25, 32, r25 // normalize to s2.30
3057 muls.l r25, r19, r19 // s2.38
3058 ldx.w r20, r21, r21 // s2.14
3060 shari r19, 24, r19 // truncate to s2.14
3061 sub r21, r19, r19 // some 11 bit inverse in s1.14
3062 muls.l r19, r19, r21 // u0.28
3065 muls.l r25, r21, r18 // s2.58
3066 shlli r19, 45, r19 // multiply by two and convert to s2.58
3069 shari r18, 28, r18 // some 22 bit inverse in s1.30
3070 muls.l r18, r25, r0 // s2.60
3071 muls.l r18, r4, r25 // s32.30
3073 shari r0, 16, r19 // s-16.44
3074 muls.l r19, r18, r19 // s-16.74
3076 shari r4, 14, r18 // s19.-14
3077 shari r19, 30, r19 // s-16.44
3078 muls.l r19, r18, r19 // s15.30
3079 xor r21, r0, r21 // You could also use the constant 1 << 27.
3085 ENDFUNC(GLOBAL(sdivsi3))
3086 /* This table has been generated by divtab.c .
3087 Defects for bias -330:
3088 Max defect: 6.081536e-07 at -1.000000e+00
3089 Min defect: 2.849516e-08 at 1.030651e+00
3090 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3091 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3092 Defect at 1: 1.238659e-07
3093 Defect at -2: 1.061708e-07 */
3094 #else /* ! __pic__ || ! __SHMEDIA__ */
3096 #endif /* __pic__ */
3097 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3099 .type Local_div_table,@object
3100 .size Local_div_table,128
3101 /* negative division constants */
3118 /* negative division factors */
3138 /* positive division factors */
3155 /* positive division constants */
3173 #endif /* TEXT_DATA_BUG */
3175 .type GLOBAL(div_table),@object
3176 .size GLOBAL(div_table),128
3177 /* negative division constants */
3194 /* negative division factors */
3212 .global GLOBAL(div_table)
3214 HIDDEN_ALIAS(div_table_internal,div_table)
3216 /* positive division factors */
3233 /* positive division constants */
3250 #endif /* L_div_table */
3251 #endif /* __SH5__ */