1 /* Copyright
(C
) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 Free Software Foundation
, Inc.
5 This file is free software
; you can redistribute it and/or modify it
6 under the terms of the GNU General
Public License as published by the
7 Free Software Foundation
; either version 3, or (at your option) any
10 This file is distributed
in the hope that it will be useful
, but
11 WITHOUT ANY WARRANTY
; without even the implied warranty of
12 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General
Public License for more details.
15 Under
Section 7 of GPL version
3, you are granted additional
16 permissions described
in the GCC Runtime Library Exception
, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General
Public License
and
20 a copy of the GCC Runtime Library Exception along with
this program
;
21 see the files COPYING3
and COPYING.RUNTIME respectively. If
not, see
22 <http://www.gnu.
org/licenses
/>.
*/
25 !! libgcc routines for the Renesas
/ SuperH SH CPUs.
26 !! Contributed by Steve Chamberlain.
29 !! ashiftrt_r4_x
, ___ashrsi3
, ___ashlsi3
, ___lshrsi3 routines
30 !! recoded
in assembly by Toshiyasu Morita
33 #if defined
(__ELF__
) && defined
(__linux__
)
34 .
section .note.GNU
-stack
,"",%progbits
38 /* SH2 optimizations for ___ashrsi3
, ___ashlsi3
, ___lshrsi3
and
39 ELF
local label prefixes by J
"orn Rennecke
42 #include "lib1funcs.h
"
44 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
45 so it is more convenient to define NO_FPSCR_VALUES here than to
46 define it on the command line. */
47 #if defined __vxworks && defined __PIC__
48 #define NO_FPSCR_VALUES
53 .global GLOBAL(ashiftrt_r4_0)
54 .global GLOBAL(ashiftrt_r4_1)
55 .global GLOBAL(ashiftrt_r4_2)
56 .global GLOBAL(ashiftrt_r4_3)
57 .global GLOBAL(ashiftrt_r4_4)
58 .global GLOBAL(ashiftrt_r4_5)
59 .global GLOBAL(ashiftrt_r4_6)
60 .global GLOBAL(ashiftrt_r4_7)
61 .global GLOBAL(ashiftrt_r4_8)
62 .global GLOBAL(ashiftrt_r4_9)
63 .global GLOBAL(ashiftrt_r4_10)
64 .global GLOBAL(ashiftrt_r4_11)
65 .global GLOBAL(ashiftrt_r4_12)
66 .global GLOBAL(ashiftrt_r4_13)
67 .global GLOBAL(ashiftrt_r4_14)
68 .global GLOBAL(ashiftrt_r4_15)
69 .global GLOBAL(ashiftrt_r4_16)
70 .global GLOBAL(ashiftrt_r4_17)
71 .global GLOBAL(ashiftrt_r4_18)
72 .global GLOBAL(ashiftrt_r4_19)
73 .global GLOBAL(ashiftrt_r4_20)
74 .global GLOBAL(ashiftrt_r4_21)
75 .global GLOBAL(ashiftrt_r4_22)
76 .global GLOBAL(ashiftrt_r4_23)
77 .global GLOBAL(ashiftrt_r4_24)
78 .global GLOBAL(ashiftrt_r4_25)
79 .global GLOBAL(ashiftrt_r4_26)
80 .global GLOBAL(ashiftrt_r4_27)
81 .global GLOBAL(ashiftrt_r4_28)
82 .global GLOBAL(ashiftrt_r4_29)
83 .global GLOBAL(ashiftrt_r4_30)
84 .global GLOBAL(ashiftrt_r4_31)
85 .global GLOBAL(ashiftrt_r4_32)
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
118 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
119 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
122 GLOBAL(ashiftrt_r4_32):
123 GLOBAL(ashiftrt_r4_31):
128 GLOBAL(ashiftrt_r4_30):
130 GLOBAL(ashiftrt_r4_29):
132 GLOBAL(ashiftrt_r4_28):
134 GLOBAL(ashiftrt_r4_27):
136 GLOBAL(ashiftrt_r4_26):
138 GLOBAL(ashiftrt_r4_25):
140 GLOBAL(ashiftrt_r4_24):
146 GLOBAL(ashiftrt_r4_23):
148 GLOBAL(ashiftrt_r4_22):
150 GLOBAL(ashiftrt_r4_21):
152 GLOBAL(ashiftrt_r4_20):
154 GLOBAL(ashiftrt_r4_19):
156 GLOBAL(ashiftrt_r4_18):
158 GLOBAL(ashiftrt_r4_17):
160 GLOBAL(ashiftrt_r4_16):
165 GLOBAL(ashiftrt_r4_15):
167 GLOBAL(ashiftrt_r4_14):
169 GLOBAL(ashiftrt_r4_13):
171 GLOBAL(ashiftrt_r4_12):
173 GLOBAL(ashiftrt_r4_11):
175 GLOBAL(ashiftrt_r4_10):
177 GLOBAL(ashiftrt_r4_9):
179 GLOBAL(ashiftrt_r4_8):
181 GLOBAL(ashiftrt_r4_7):
183 GLOBAL(ashiftrt_r4_6):
185 GLOBAL(ashiftrt_r4_5):
187 GLOBAL(ashiftrt_r4_4):
189 GLOBAL(ashiftrt_r4_3):
191 GLOBAL(ashiftrt_r4_2):
193 GLOBAL(ashiftrt_r4_1):
197 GLOBAL(ashiftrt_r4_0):
201 ENDFUNC(GLOBAL(ashiftrt_r4_0))
202 ENDFUNC(GLOBAL(ashiftrt_r4_1))
203 ENDFUNC(GLOBAL(ashiftrt_r4_2))
204 ENDFUNC(GLOBAL(ashiftrt_r4_3))
205 ENDFUNC(GLOBAL(ashiftrt_r4_4))
206 ENDFUNC(GLOBAL(ashiftrt_r4_5))
207 ENDFUNC(GLOBAL(ashiftrt_r4_6))
208 ENDFUNC(GLOBAL(ashiftrt_r4_7))
209 ENDFUNC(GLOBAL(ashiftrt_r4_8))
210 ENDFUNC(GLOBAL(ashiftrt_r4_9))
211 ENDFUNC(GLOBAL(ashiftrt_r4_10))
212 ENDFUNC(GLOBAL(ashiftrt_r4_11))
213 ENDFUNC(GLOBAL(ashiftrt_r4_12))
214 ENDFUNC(GLOBAL(ashiftrt_r4_13))
215 ENDFUNC(GLOBAL(ashiftrt_r4_14))
216 ENDFUNC(GLOBAL(ashiftrt_r4_15))
217 ENDFUNC(GLOBAL(ashiftrt_r4_16))
218 ENDFUNC(GLOBAL(ashiftrt_r4_17))
219 ENDFUNC(GLOBAL(ashiftrt_r4_18))
220 ENDFUNC(GLOBAL(ashiftrt_r4_19))
221 ENDFUNC(GLOBAL(ashiftrt_r4_20))
222 ENDFUNC(GLOBAL(ashiftrt_r4_21))
223 ENDFUNC(GLOBAL(ashiftrt_r4_22))
224 ENDFUNC(GLOBAL(ashiftrt_r4_23))
225 ENDFUNC(GLOBAL(ashiftrt_r4_24))
226 ENDFUNC(GLOBAL(ashiftrt_r4_25))
227 ENDFUNC(GLOBAL(ashiftrt_r4_26))
228 ENDFUNC(GLOBAL(ashiftrt_r4_27))
229 ENDFUNC(GLOBAL(ashiftrt_r4_28))
230 ENDFUNC(GLOBAL(ashiftrt_r4_29))
231 ENDFUNC(GLOBAL(ashiftrt_r4_30))
232 ENDFUNC(GLOBAL(ashiftrt_r4_31))
233 ENDFUNC(GLOBAL(ashiftrt_r4_32))
255 .global GLOBAL(ashrsi3)
256 HIDDEN_FUNC(GLOBAL(ashrsi3))
261 mova LOCAL(ashrsi3_table),r0
272 LOCAL(ashrsi3_table):
273 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
384 ENDFUNC(GLOBAL(ashrsi3))
405 .global GLOBAL(ashlsi3)
406 HIDDEN_FUNC(GLOBAL(ashlsi3))
411 mova LOCAL(ashlsi3_table),r0
422 LOCAL(ashlsi3_table):
423 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
543 ENDFUNC(GLOBAL(ashlsi3))
564 .global GLOBAL(lshrsi3)
565 HIDDEN_FUNC(GLOBAL(lshrsi3))
570 mova LOCAL(lshrsi3_table),r0
581 LOCAL(lshrsi3_table):
582 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
702 ENDFUNC(GLOBAL(lshrsi3))
708 .global GLOBAL(movmem)
709 HIDDEN_FUNC(GLOBAL(movmem))
710 HIDDEN_ALIAS(movstr,movmem)
711 /* This would be a lot simpler if r6 contained the byte count
712 minus 64, and we wouldn't be called here for a byte count of 64. */
716 bsr GLOBAL(movmemSI52+2)
719 LOCAL(movmem_loop): /* Reached with rts */
725 bt LOCAL(movmem_done)
732 bt GLOBAL(movmemSI52)
733 ! done all the large groups, do the remainder
735 mova GLOBAL(movmemSI4)+4,r0
738 LOCAL(movmem_done): ! share slot insn, works out aligned.
745 ! ??? We need aliases movstr* for movmem* for the older libraries. These
746 ! aliases will be removed at the some point in the future.
747 .global GLOBAL(movmemSI64)
748 HIDDEN_FUNC(GLOBAL(movmemSI64))
749 HIDDEN_ALIAS(movstrSI64,movmemSI64)
753 .global GLOBAL(movmemSI60)
754 HIDDEN_FUNC(GLOBAL(movmemSI60))
755 HIDDEN_ALIAS(movstrSI60,movmemSI60)
759 .global GLOBAL(movmemSI56)
760 HIDDEN_FUNC(GLOBAL(movmemSI56))
761 HIDDEN_ALIAS(movstrSI56,movmemSI56)
765 .global GLOBAL(movmemSI52)
766 HIDDEN_FUNC(GLOBAL(movmemSI52))
767 HIDDEN_ALIAS(movstrSI52,movmemSI52)
771 .global GLOBAL(movmemSI48)
772 HIDDEN_FUNC(GLOBAL(movmemSI48))
773 HIDDEN_ALIAS(movstrSI48,movmemSI48)
777 .global GLOBAL(movmemSI44)
778 HIDDEN_FUNC(GLOBAL(movmemSI44))
779 HIDDEN_ALIAS(movstrSI44,movmemSI44)
783 .global GLOBAL(movmemSI40)
784 HIDDEN_FUNC(GLOBAL(movmemSI40))
785 HIDDEN_ALIAS(movstrSI40,movmemSI40)
789 .global GLOBAL(movmemSI36)
790 HIDDEN_FUNC(GLOBAL(movmemSI36))
791 HIDDEN_ALIAS(movstrSI36,movmemSI36)
795 .global GLOBAL(movmemSI32)
796 HIDDEN_FUNC(GLOBAL(movmemSI32))
797 HIDDEN_ALIAS(movstrSI32,movmemSI32)
801 .global GLOBAL(movmemSI28)
802 HIDDEN_FUNC(GLOBAL(movmemSI28))
803 HIDDEN_ALIAS(movstrSI28,movmemSI28)
807 .global GLOBAL(movmemSI24)
808 HIDDEN_FUNC(GLOBAL(movmemSI24))
809 HIDDEN_ALIAS(movstrSI24,movmemSI24)
813 .global GLOBAL(movmemSI20)
814 HIDDEN_FUNC(GLOBAL(movmemSI20))
815 HIDDEN_ALIAS(movstrSI20,movmemSI20)
819 .global GLOBAL(movmemSI16)
820 HIDDEN_FUNC(GLOBAL(movmemSI16))
821 HIDDEN_ALIAS(movstrSI16,movmemSI16)
825 .global GLOBAL(movmemSI12)
826 HIDDEN_FUNC(GLOBAL(movmemSI12))
827 HIDDEN_ALIAS(movstrSI12,movmemSI12)
831 .global GLOBAL(movmemSI8)
832 HIDDEN_FUNC(GLOBAL(movmemSI8))
833 HIDDEN_ALIAS(movstrSI8,movmemSI8)
837 .global GLOBAL(movmemSI4)
838 HIDDEN_FUNC(GLOBAL(movmemSI4))
839 HIDDEN_ALIAS(movstrSI4,movmemSI4)
845 ENDFUNC(GLOBAL(movmemSI64))
846 ENDFUNC(GLOBAL(movmemSI60))
847 ENDFUNC(GLOBAL(movmemSI56))
848 ENDFUNC(GLOBAL(movmemSI52))
849 ENDFUNC(GLOBAL(movmemSI48))
850 ENDFUNC(GLOBAL(movmemSI44))
851 ENDFUNC(GLOBAL(movmemSI40))
852 ENDFUNC(GLOBAL(movmemSI36))
853 ENDFUNC(GLOBAL(movmemSI32))
854 ENDFUNC(GLOBAL(movmemSI28))
855 ENDFUNC(GLOBAL(movmemSI24))
856 ENDFUNC(GLOBAL(movmemSI20))
857 ENDFUNC(GLOBAL(movmemSI16))
858 ENDFUNC(GLOBAL(movmemSI12))
859 ENDFUNC(GLOBAL(movmemSI8))
860 ENDFUNC(GLOBAL(movmemSI4))
861 ENDFUNC(GLOBAL(movmem))
866 .global GLOBAL(movmem_i4_even)
867 .global GLOBAL(movmem_i4_odd)
868 .global GLOBAL(movmemSI12_i4)
870 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
871 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
872 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
874 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
875 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
876 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
886 GLOBAL(movmem_i4_even):
888 bra L_movmem_start_even
891 GLOBAL(movmem_i4_odd):
903 bt/s L_movmem_2mod4_end
917 ENDFUNC(GLOBAL(movmem_i4_even))
918 ENDFUNC(GLOBAL(movmem_i4_odd))
921 GLOBAL(movmemSI12_i4):
930 ENDFUNC(GLOBAL(movmemSI12_i4))
936 .global GLOBAL(mulsi3)
937 HIDDEN_FUNC(GLOBAL(mulsi3))
941 ! r0 = aabb*ccdd via partial products
943 ! if aa == 0 and cc = 0
947 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
951 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
952 mov r5,r3 ! r3 = ccdd
953 swap.w r4,r2 ! r2 = bbaa
954 xtrct r2,r3 ! r3 = aacc
955 tst r3,r3 ! msws zero ?
957 rts ! yes - then we have the answer
960 hiset: sts macl,r0 ! r0 = bb*dd
961 mulu.w r2,r5 ! brewing macl = aa*dd
963 mulu.w r3,r4 ! brewing macl = cc*bb
970 ENDFUNC(GLOBAL(mulsi3))
972 #endif /* ! __SH5__ */
975 !! 4 byte integer Divide code for the Renesas SH
977 !! args in r4 and r5, result in fpul, clobber dr0, dr2
979 .global GLOBAL(sdivsi3_i4)
980 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
990 ENDFUNC(GLOBAL(sdivsi3_i4))
991 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
992 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
994 #if ! __SH5__ || __SH5__ == 32
998 .global GLOBAL(sdivsi3_i4)
999 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1014 ENDFUNC(GLOBAL(sdivsi3_i4))
1015 #endif /* ! __SH5__ || __SH5__ == 32 */
1016 #endif /* ! __SH4__ */
1020 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1022 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1024 !! Steve Chamberlain
1029 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1031 .global GLOBAL(sdivsi3)
1034 .section .text..SHmedia32,"ax"
1040 /* The assembly code that follows is a hand-optimized version of the C
1041 code that follows. Note that the registers that are modified are
1042 exactly those listed as clobbered in the patterns divsi3_i1 and
1045 int __sdivsi3 (i, j)
1048 register unsigned long long r18 asm ("r18
");
1049 register unsigned long long r19 asm ("r19
");
1050 register unsigned long long r0 asm ("r0
") = 0;
1051 register unsigned long long r1 asm ("r1
") = 1;
1052 register int r2 asm ("r2
") = i >> 31;
1053 register int r3 asm ("r3
") = j >> 31;
1065 r0 |= r1, r18 -= r19;
1066 while (r19 >>= 1, r1 >>= 1);
1068 return r2 * (int)r0;
1072 pt/l LOCAL(sdivsi3_dontadd), tr2
1073 pt/l LOCAL(sdivsi3_loop), tr1
1086 LOCAL(sdivsi3_loop):
1090 LOCAL(sdivsi3_dontadd):
1099 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1102 // can create absolute value without extra latency,
1103 // but dependent on proper sign extension of inputs:
1106 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1109 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1110 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1118 // If r4 was to be used in-place instead of r21, could use this sequence
1119 // to compute absolute:
1120 // sub r63,r4,r19 // compute absolute value of r4
1121 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1122 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1134 mmacnfx.wl r25,r2,r1
1159 #else /* ! 0 && ! 0 */
1162 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1164 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1166 FUNC(GLOBAL(sdivsi3))
1167 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1168 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1169 // with the SHcompact implementation, which clobbers tr1 / tr2.
1170 .global GLOBAL(sdivsi3_1)
1172 .global GLOBAL(div_table_internal)
1173 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1174 shori GLOBAL(div_table_internal) & 65535, r20
1176 .global GLOBAL(sdivsi3_2)
1178 // clobbered: r1,r18,r19,r21,r25,tr0
1181 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1182 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1183 ldx.ub r20, r21, r19 // u0.8
1184 shari r25, 32, r25 // normalize to s2.30
1186 muls.l r25, r19, r19 // s2.38
1187 ldx.w r20, r21, r21 // s2.14
1189 shari r19, 24, r19 // truncate to s2.14
1190 sub r21, r19, r19 // some 11 bit inverse in s1.14
1191 muls.l r19, r19, r21 // u0.28
1194 muls.l r25, r21, r18 // s2.58
1195 shlli r19, 45, r19 // multiply by two and convert to s2.58
1198 shari r18, 28, r18 // some 22 bit inverse in s1.30
1199 muls.l r18, r25, r0 // s2.60
1200 muls.l r18, r4, r25 // s32.30
1202 shari r0, 16, r19 // s-16.44
1203 muls.l r19, r18, r19 // s-16.74
1205 shari r4, 14, r18 // s19.-14
1206 shari r19, 30, r19 // s-16.44
1207 muls.l r19, r18, r19 // s15.30
1208 xor r21, r0, r21 // You could also use the constant 1 << 27.
1215 ENDFUNC(GLOBAL(sdivsi3))
1217 ENDFUNC(GLOBAL(sdivsi3_2))
1219 #elif defined __SHMEDIA__
1220 /* m5compact-nofpu */
1221 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1223 .section .text..SHmedia32,"ax"
1225 FUNC(GLOBAL(sdivsi3))
1227 pt/l LOCAL(sdivsi3_dontsub), tr0
1228 pt/l LOCAL(sdivsi3_loop), tr1
1240 LOCAL(sdivsi3_loop):
1244 LOCAL(sdivsi3_dontsub):
1250 ENDFUNC(GLOBAL(sdivsi3))
1251 #else /* ! __SHMEDIA__ */
1252 FUNC(GLOBAL(sdivsi3))
1337 ENDFUNC(GLOBAL(sdivsi3))
1338 #endif /* ! __SHMEDIA__ */
1339 #endif /* ! __SH4__ */
1344 !! 4 byte integer Divide code for the Renesas SH
1346 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1349 .global GLOBAL(udivsi3_i4)
1350 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1381 .align 3 ! make double below 8 byte aligned.
1386 ENDFUNC(GLOBAL(udivsi3_i4))
1387 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1388 #if ! __SH5__ || __SH5__ == 32
1389 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1391 .global GLOBAL(udivsi3_i4)
1392 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1406 ENDFUNC(GLOBAL(udivsi3_i4))
1407 #endif /* ! __SH5__ || __SH5__ == 32 */
1408 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1409 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1411 .global GLOBAL(udivsi3_i4)
1412 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1441 .align 3 ! make double below 8 byte aligned.
1456 ENDFUNC(GLOBAL(udivsi3_i4))
1457 #endif /* ! __SH4__ */
1461 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1463 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1465 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1466 .global GLOBAL(udivsi3)
1467 HIDDEN_FUNC(GLOBAL(udivsi3))
1471 .section .text..SHmedia32,"ax"
1477 /* The assembly code that follows is a hand-optimized version of the C
1478 code that follows. Note that the registers that are modified are
1479 exactly those listed as clobbered in the patterns udivsi3_i1 and
1486 register unsigned long long r0 asm ("r0
") = 0;
1487 register unsigned long long r18 asm ("r18
") = 1;
1488 register unsigned long long r4 asm ("r4
") = i;
1489 register unsigned long long r19 asm ("r19
") = j;
1495 r0 |= r18, r4 -= r19;
1496 while (r19 >>= 1, r18 >>= 1);
1502 pt/l LOCAL(udivsi3_dontadd), tr2
1503 pt/l LOCAL(udivsi3_loop), tr1
1511 LOCAL(udivsi3_loop):
1515 LOCAL(udivsi3_dontadd):
1523 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1529 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1531 mmulfx.w r21,r21,r19
1532 mshflo.w r21,r63,r21
1534 mmulfx.w r25,r19,r19
1538 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1539 before the msub.w, but we need a different value for
1540 r19 to keep errors under control. */
1542 mmulfx.w r19,r19,r19
1546 mmacnfx.wl r25,r19,r21
1571 #elif defined (__SHMEDIA__)
1572 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1573 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1574 So use a short shmedia loop. */
1575 // clobbered: r20,r21,r25,tr0,tr1,tr2
1577 .section .text..SHmedia32,"ax"
1580 pt/l LOCAL(udivsi3_dontsub), tr0
1581 pt/l LOCAL(udivsi3_loop), tr1
1586 LOCAL(udivsi3_loop):
1590 LOCAL(udivsi3_dontsub):
1595 #else /* ! defined (__SHMEDIA__) */
1599 div1 r5,r4; div1 r5,r4; div1 r5,r4
1600 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1603 div1 r5,r4; rotcl r0
1604 div1 r5,r4; rotcl r0
1605 div1 r5,r4; rotcl r0
1613 bf LOCAL(large_divisor)
1615 bf/s LOCAL(large_divisor)
1637 LOCAL(large_divisor):
1656 ENDFUNC(GLOBAL(udivsi3))
1657 #endif /* ! __SHMEDIA__ */
1658 #endif /* __SH4__ */
1659 #endif /* L_udivsi3 */
1664 .section .text..SHmedia32,"ax"
1666 .global GLOBAL(udivdi3)
1667 FUNC(GLOBAL(udivdi3))
1669 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1674 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1678 sub r63,r22,r20 // r63 == 64 % 64
1680 pta LOCAL(large_divisor),tr0
1686 bgt/u r9,r63,tr0 // large_divisor
1695 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1696 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1697 the case may be, %0000000000000000 000.11111111111, still */
1698 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1703 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1705 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1714 mcmpgt.l r21,r63,r21 // See Note 1
1716 mshfhi.l r63,r21,r21
1720 /* small divisor: need a third divide step */
1730 /* could test r3 here to check for divide by zero. */
1733 LOCAL(large_divisor):
1742 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1743 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1744 the case may be, %0000000000000000 000.11111111111, still */
1745 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1750 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1752 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1756 pta LOCAL(no_lo_adj),tr0
1763 bgtu/u r7,r25,tr0 // no_lo_adj
1769 /* large_divisor: only needs a few adjustments. */
1776 ENDFUNC(GLOBAL(udivdi3))
1777 /* Note 1: To shift the result of the second divide stage so that the result
1778 always fits into 32 bits, yet we still reduce the rest sufficiently
1779 would require a lot of instructions to do the shifts just right. Using
1780 the full 64 bit shift result to multiply with the divisor would require
1781 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1782 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1783 know that the rest after taking this partial result into account will
1784 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1785 upper 32 bits of the partial result are nonzero. */
1786 #endif /* __SHMEDIA__ */
1787 #endif /* L_udivdi3 */
1792 .section .text..SHmedia32,"ax"
1794 .global GLOBAL(divdi3)
1795 FUNC(GLOBAL(divdi3))
1797 pta GLOBAL(udivdi3_internal),tr0
1809 ENDFUNC(GLOBAL(divdi3))
1810 #endif /* __SHMEDIA__ */
1811 #endif /* L_divdi3 */
1816 .section .text..SHmedia32,"ax"
1818 .global GLOBAL(umoddi3)
1819 FUNC(GLOBAL(umoddi3))
1821 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1826 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1830 sub r63,r22,r20 // r63 == 64 % 64
1832 pta LOCAL(large_divisor),tr0
1838 bgt/u r9,r63,tr0 // large_divisor
1847 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1848 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1849 the case may be, %0000000000000000 000.11111111111, still */
1850 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1855 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1857 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1862 /* bubble */ /* could test r3 here to check for divide by zero. */
1865 mcmpgt.l r21,r63,r21 // See Note 1
1867 mshfhi.l r63,r21,r21
1871 /* small divisor: need a third divide step */
1874 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1884 LOCAL(large_divisor):
1893 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1894 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1895 the case may be, %0000000000000000 000.11111111111, still */
1896 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1901 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1903 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1907 pta LOCAL(no_lo_adj),tr0
1914 bgtu/u r7,r25,tr0 // no_lo_adj
1920 /* large_divisor: only needs a few adjustments. */
1929 ENDFUNC(GLOBAL(umoddi3))
1930 /* Note 1: To shift the result of the second divide stage so that the result
1931 always fits into 32 bits, yet we still reduce the rest sufficiently
1932 would require a lot of instructions to do the shifts just right. Using
1933 the full 64 bit shift result to multiply with the divisor would require
1934 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1935 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1936 know that the rest after taking this partial result into account will
1937 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1938 upper 32 bits of the partial result are nonzero. */
1939 #endif /* __SHMEDIA__ */
1940 #endif /* L_umoddi3 */
1945 .section .text..SHmedia32,"ax"
1947 .global GLOBAL(moddi3)
1948 FUNC(GLOBAL(moddi3))
1950 pta GLOBAL(umoddi3_internal),tr0
1962 ENDFUNC(GLOBAL(moddi3))
1963 #endif /* __SHMEDIA__ */
1964 #endif /* L_moddi3 */
1967 #if !defined (__SH2A_NOFPU__)
1968 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1972 .global GLOBAL(set_fpscr)
1973 HIDDEN_FUNC(GLOBAL(set_fpscr))
1979 mov.l LOCAL(set_fpscr_L0_base),r12
1980 mov.l LOCAL(set_fpscr_L0_index),r0
1984 mova LOCAL(set_fpscr_L0),r0
1985 mov.l LOCAL(set_fpscr_L0),r12
1988 mov.l LOCAL(set_fpscr_L1),r0
1992 mov.l LOCAL(set_fpscr_L1),r1
1999 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2002 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2011 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2015 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2023 LOCAL(set_fpscr_L0_base):
2024 .long ___GOTT_BASE__
2025 LOCAL(set_fpscr_L0_index):
2026 .long ___GOTT_INDEX__
2028 LOCAL(set_fpscr_L0):
2029 .long _GLOBAL_OFFSET_TABLE_
2031 LOCAL(set_fpscr_L1):
2032 .long GLOBAL(fpscr_values@GOT)
2034 LOCAL(set_fpscr_L1):
2035 .long GLOBAL(fpscr_values)
2038 ENDFUNC(GLOBAL(set_fpscr))
2039 #ifndef NO_FPSCR_VALUES
2041 .comm GLOBAL(fpscr_values),8,4
2043 .comm GLOBAL(fpscr_values),8
2045 #endif /* NO_FPSCR_VALUES */
2046 #endif /* SH2E / SH3E / SH4 */
2047 #endif /* __SH2A_NOFPU__ */
2048 #endif /* L_set_fpscr */
2049 #ifdef L_ic_invalidate
2052 .section .text..SHmedia32,"ax"
2054 .global GLOBAL(init_trampoline)
2055 HIDDEN_FUNC(GLOBAL(init_trampoline))
2056 GLOBAL(init_trampoline):
2058 #ifdef __LITTLE_ENDIAN__
2064 movi 0xffffffffffffd002,r20
2071 ENDFUNC(GLOBAL(init_trampoline))
2072 .global GLOBAL(ic_invalidate)
2073 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2074 GLOBAL(ic_invalidate):
2081 ENDFUNC(GLOBAL(ic_invalidate))
2082 #elif defined(__SH4A__)
2083 .global GLOBAL(ic_invalidate)
2084 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2085 GLOBAL(ic_invalidate):
2091 ENDFUNC(GLOBAL(ic_invalidate))
2092 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2093 /* For system code, we use ic_invalidate_line_i, but user code
2094 needs a different mechanism. A kernel call is generally not
2095 available, and it would also be slow. Different SH4 variants use
2096 different sizes and associativities of the Icache. We use a small
2097 bit of dispatch code that can be put hidden in every shared object,
2098 which calls the actual processor-specific invalidation code in a
2100 Or if you have operating system support, the OS could mmap the
2101 procesor-specific code from a single page, since it is highly
2103 .global GLOBAL(ic_invalidate)
2104 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2105 GLOBAL(ic_invalidate):
2132 0: .long GLOBAL(ic_invalidate_array)
2134 .global GLOBAL(ic_invalidate_array)
2135 0: .long GLOBAL(ic_invalidate_array)@GOT
2137 1: .long ___GOTT_BASE__
2138 2: .long ___GOTT_INDEX__
2140 1: .long _GLOBAL_OFFSET_TABLE_
2142 ENDFUNC(GLOBAL(ic_invalidate))
2143 #endif /* __pic__ */
2145 #endif /* L_ic_invalidate */
2147 #ifdef L_ic_invalidate_array
2148 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2149 .global GLOBAL(ic_invalidate_array)
2150 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2151 .global GLOBAL(ic_invalidate_array)
2152 FUNC(GLOBAL(ic_invalidate_array))
2153 GLOBAL(ic_invalidate_array):
2161 ENDFUNC(GLOBAL(ic_invalidate_array))
2162 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2163 .global GLOBAL(ic_invalidate_array)
2165 FUNC(GLOBAL(ic_invalidate_array))
2166 /* This must be aligned to the beginning of a cache line. */
2167 GLOBAL(ic_invalidate_array):
2170 #define WAY_SIZE 0x4000
2173 .rept WAY_SIZE * WAYS / 32
2181 .rept WAY_SIZE * WAYS / 32
2195 #else /* WAYS > 6 */
2196 /* This variant needs two different pages for mmap-ing. */
2214 ENDFUNC(GLOBAL(ic_invalidate_array))
2216 #endif /* L_ic_invalidate_array */
2218 #if defined (__SH5__) && __SH5__ == 32
2219 #ifdef L_shcompact_call_trampoline
2222 LOCAL(ct_main_table):
2223 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2249 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2250 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2251 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2252 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2253 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2254 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2255 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2257 .section .text..SHmedia32, "ax"
2260 /* This function loads 64-bit general-purpose registers from the
2261 stack, from a memory address contained in them or from an FP
2262 register, according to a cookie passed in r1. Its execution
2263 time is linear on the number of registers that actually have
2264 to be copied. See sh.h for details on the actual bit pattern.
2266 The function to be called is passed in r0. If a 32-bit return
2267 value is expected, the actual function will be tail-called,
2268 otherwise the return address will be stored in r10 (that the
2269 caller should expect to be clobbered) and the return value
2270 will be expanded into r2/r3 upon return. */
2272 .global GLOBAL(GCC_shcompact_call_trampoline)
2273 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2274 GLOBAL(GCC_shcompact_call_trampoline):
2275 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2276 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2277 pt/l LOCAL(ct_loop), tr1
2279 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2284 LOCAL(ct_main_label):
2287 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2288 /* It must be dr0, so just do it. */
2294 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2295 /* It is either dr0 or dr2. */
2304 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2305 shlri r1, 23 - 3, r34
2306 andi r34, 3 << 3, r33
2307 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2308 LOCAL(ct_r4_fp_base):
2314 LOCAL(ct_r4_fp_copy):
2321 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2322 shlri r1, 20 - 3, r34
2323 andi r34, 3 << 3, r33
2324 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2325 LOCAL(ct_r5_fp_base):
2331 LOCAL(ct_r5_fp_copy):
2340 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2341 /* It must be dr8. */
2347 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2348 shlri r1, 16 - 3, r34
2349 andi r34, 3 << 3, r33
2350 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2351 LOCAL(ct_r6_fp_base):
2357 LOCAL(ct_r6_fp_copy):
2366 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2367 /* It is either dr8 or dr10. */
2375 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2376 shlri r1, 12 - 3, r34
2377 andi r34, 3 << 3, r33
2378 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2379 LOCAL(ct_r7_fp_base):
2384 LOCAL(ct_r7_fp_copy):
2393 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2394 /* It is either dr8 or dr10. */
2396 andi r1, 1 << 8, r32
2402 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2403 shlri r1, 8 - 3, r34
2404 andi r34, 3 << 3, r33
2405 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2406 LOCAL(ct_r8_fp_base):
2411 LOCAL(ct_r8_fp_copy):
2420 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2421 /* It is either dr8 or dr10. */
2423 andi r1, 1 << 4, r32
2429 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2430 shlri r1, 4 - 3, r34
2431 andi r34, 3 << 3, r33
2432 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2433 LOCAL(ct_r9_fp_base):
2438 LOCAL(ct_r9_fp_copy):
2447 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2448 pt/l LOCAL(ct_r2_load), tr2
2457 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2458 pt/l LOCAL(ct_r3_load), tr2
2466 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2467 pt/l LOCAL(ct_r4_load), tr2
2475 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2476 pt/l LOCAL(ct_r5_load), tr2
2484 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2485 pt/l LOCAL(ct_r6_load), tr2
2492 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2493 pt/l LOCAL(ct_r7_load), tr2
2500 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2501 pt/l LOCAL(ct_r8_load), tr2
2508 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2509 pt/l LOCAL(ct_check_tramp), tr2
2533 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2540 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2547 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2554 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2561 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2568 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2574 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2580 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2581 andi r1, 7 << 1, r30
2582 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2584 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2588 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2601 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2604 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2605 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2606 pt/u LOCAL(ct_ret_wide), tr2
2609 LOCAL(ct_call_func): /* Just branch to the function. */
2611 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2612 64-bit return value. */
2616 #if __LITTLE_ENDIAN__
2625 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2626 #endif /* L_shcompact_call_trampoline */
2628 #ifdef L_shcompact_return_trampoline
2629 /* This function does the converse of the code in `ret_wide'
2630 above. It is tail-called by SHcompact functions returning
2631 64-bit non-floating-point values, to pack the 32-bit values in
2632 r2 and r3 into r2. */
2635 .section .text..SHmedia32, "ax"
2637 .global GLOBAL(GCC_shcompact_return_trampoline)
2638 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2639 GLOBAL(GCC_shcompact_return_trampoline):
2641 #if __LITTLE_ENDIAN__
2651 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2652 #endif /* L_shcompact_return_trampoline */
2654 #ifdef L_shcompact_incoming_args
2657 LOCAL(ia_main_table):
2658 .word 1 /* Invalid, just loop */
2659 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2660 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2661 .word 1 /* Invalid, just loop */
2662 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2663 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2664 .word 1 /* Invalid, just loop */
2665 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2666 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2667 .word 1 /* Invalid, just loop */
2668 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2669 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2670 .word 1 /* Invalid, just loop */
2671 .word 1 /* Invalid, just loop */
2672 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2673 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2674 .word 1 /* Invalid, just loop */
2675 .word 1 /* Invalid, just loop */
2676 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2677 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2678 .word 1 /* Invalid, just loop */
2679 .word 1 /* Invalid, just loop */
2680 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2682 .word 1 /* Invalid, just loop */
2683 .word 1 /* Invalid, just loop */
2684 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2685 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2686 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2687 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2688 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2689 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2690 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2692 .section .text..SHmedia32, "ax"
2695 /* This function stores 64-bit general-purpose registers back in
2696 the stack, and loads the address in which each register
2697 was stored into itself. The lower 32 bits of r17 hold the address
2698 to begin storing, and the upper 32 bits of r17 hold the cookie.
2699 Its execution time is linear on the
2700 number of registers that actually have to be copied, and it is
2701 optimized for structures larger than 64 bits, as opposed to
2702 individual `long long' arguments. See sh.h for details on the
2703 actual bit pattern. */
2705 .global GLOBAL(GCC_shcompact_incoming_args)
2706 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2707 GLOBAL(GCC_shcompact_incoming_args):
2708 ptabs/l r18, tr0 /* Prepare to return. */
2709 shlri r17, 32, r0 /* Load the cookie. */
2710 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2711 pt/l LOCAL(ia_loop), tr1
2713 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2718 LOCAL(ia_main_label):
2721 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2730 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2739 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2748 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2757 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2766 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2774 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2782 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2786 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2793 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2800 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2807 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2814 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2821 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2827 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2833 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2834 andi r0, 7 << 1, r38
2835 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2837 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2841 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2854 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2856 LOCAL(ia_return): /* Return. */
2858 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2859 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2860 #endif /* L_shcompact_incoming_args */
2863 #ifdef L_nested_trampoline
2865 .section .text..SHmedia32,"ax"
2869 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2870 .global GLOBAL(GCC_nested_trampoline)
2871 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2872 GLOBAL(GCC_nested_trampoline):
2889 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2890 #endif /* L_nested_trampoline */
2891 #endif /* __SH5__ */
2893 #ifdef L_push_pop_shmedia_regs
2894 .section .text..SHmedia32,"ax"
2897 #ifndef __SH4_NOFPU__
2898 .global GLOBAL(GCC_push_shmedia_regs)
2899 FUNC(GLOBAL(GCC_push_shmedia_regs))
2900 GLOBAL(GCC_push_shmedia_regs):
2901 addi.l r15, -14*8, r15
2902 fst.d r15, 13*8, dr62
2903 fst.d r15, 12*8, dr60
2904 fst.d r15, 11*8, dr58
2905 fst.d r15, 10*8, dr56
2906 fst.d r15, 9*8, dr54
2907 fst.d r15, 8*8, dr52
2908 fst.d r15, 7*8, dr50
2909 fst.d r15, 6*8, dr48
2910 fst.d r15, 5*8, dr46
2911 fst.d r15, 4*8, dr44
2912 fst.d r15, 3*8, dr42
2913 fst.d r15, 2*8, dr40
2914 fst.d r15, 1*8, dr38
2915 fst.d r15, 0*8, dr36
2916 #else /* ! __SH4_NOFPU__ */
2917 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2918 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2919 GLOBAL(GCC_push_shmedia_regs_nofpu):
2920 #endif /* ! __SH4_NOFPU__ */
2922 addi.l r15, -27*8, r15
2954 #ifndef __SH4_NOFPU__
2955 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2957 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2959 #ifndef __SH4_NOFPU__
2960 .global GLOBAL(GCC_pop_shmedia_regs)
2961 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2962 GLOBAL(GCC_pop_shmedia_regs):
2965 fld.d r15, 40*8, dr62
2966 fld.d r15, 39*8, dr60
2967 fld.d r15, 38*8, dr58
2968 fld.d r15, 37*8, dr56
2969 fld.d r15, 36*8, dr54
2970 fld.d r15, 35*8, dr52
2971 fld.d r15, 34*8, dr50
2972 fld.d r15, 33*8, dr48
2973 fld.d r15, 32*8, dr46
2974 fld.d r15, 31*8, dr44
2975 fld.d r15, 30*8, dr42
2976 fld.d r15, 29*8, dr40
2977 fld.d r15, 28*8, dr38
2978 fld.d r15, 27*8, dr36
2980 #else /* ! __SH4_NOFPU__ */
2981 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2982 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2983 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2984 #endif /* ! __SH4_NOFPU__ */
3021 #ifndef __SH4_NOFPU__
3022 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3024 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3026 #endif /* __SH5__ == 32 */
3027 #endif /* L_push_pop_shmedia_regs */
3031 #if defined(__pic__) && defined(__SHMEDIA__)
3032 .global GLOBAL(sdivsi3)
3033 FUNC(GLOBAL(sdivsi3))
3035 .section .text..SHmedia32,"ax"
3040 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3041 in a text section does not work (at least for shared libraries):
3042 the linker sets the LSB of the address as if this was SHmedia code. */
3043 #define TEXT_DATA_BUG
3047 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3049 .global GLOBAL(sdivsi3)
3051 #ifdef TEXT_DATA_BUG
3052 ptb datalabel Local_div_table,tr0
3054 ptb GLOBAL(div_table_internal),tr0
3057 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3058 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3061 ldx.ub r20, r21, r19 // u0.8
3062 shari r25, 32, r25 // normalize to s2.30
3064 muls.l r25, r19, r19 // s2.38
3065 ldx.w r20, r21, r21 // s2.14
3067 shari r19, 24, r19 // truncate to s2.14
3068 sub r21, r19, r19 // some 11 bit inverse in s1.14
3069 muls.l r19, r19, r21 // u0.28
3072 muls.l r25, r21, r18 // s2.58
3073 shlli r19, 45, r19 // multiply by two and convert to s2.58
3076 shari r18, 28, r18 // some 22 bit inverse in s1.30
3077 muls.l r18, r25, r0 // s2.60
3078 muls.l r18, r4, r25 // s32.30
3080 shari r0, 16, r19 // s-16.44
3081 muls.l r19, r18, r19 // s-16.74
3083 shari r4, 14, r18 // s19.-14
3084 shari r19, 30, r19 // s-16.44
3085 muls.l r19, r18, r19 // s15.30
3086 xor r21, r0, r21 // You could also use the constant 1 << 27.
3092 ENDFUNC(GLOBAL(sdivsi3))
3093 /* This table has been generated by divtab.c .
3094 Defects for bias -330:
3095 Max defect: 6.081536e-07 at -1.000000e+00
3096 Min defect: 2.849516e-08 at 1.030651e+00
3097 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3098 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3099 Defect at 1: 1.238659e-07
3100 Defect at -2: 1.061708e-07 */
3101 #else /* ! __pic__ || ! __SHMEDIA__ */
3103 #endif /* __pic__ */
3104 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3106 .type Local_div_table,@object
3107 .size Local_div_table,128
3108 /* negative division constants */
3125 /* negative division factors */
3145 /* positive division factors */
3162 /* positive division constants */
3180 #endif /* TEXT_DATA_BUG */
3182 .type GLOBAL(div_table),@object
3183 .size GLOBAL(div_table),128
3184 /* negative division constants */
3201 /* negative division factors */
3219 .global GLOBAL(div_table)
3221 HIDDEN_ALIAS(div_table_internal,div_table)
3223 /* positive division factors */
3240 /* positive division constants */
3258 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3259 /* This code used shld, thus is not suitable for SH1 / SH2. */
3261 /* Signed / unsigned division without use of FPU, optimized for SH4.
3262 Uses a lookup table for divisors in the range -128 .. +128, and
3263 div1 with case distinction for larger divisors in three more ranges.
3264 The code is lumped together with the table to allow the use of mova. */
3265 #ifdef __LITTLE_ENDIAN__
3276 .global GLOBAL(udivsi3_i4i)
3277 FUNC(GLOBAL(udivsi3_i4i))
3278 GLOBAL(udivsi3_i4i):
3279 mov.w LOCAL(c128_w), r1
3285 bf LOCAL(udiv_le128)
3287 bf LOCAL(udiv_ge64k)
3300 mova LOCAL(div_table_ix),r0
3301 bra LOCAL(div_le128_2)
3305 mova LOCAL(div_table_ix),r0
3309 mova LOCAL(div_table_inv),r0
3313 mova LOCAL(div_table_clz),r0
3316 bt/s LOCAL(div_by_1)
3327 LOCAL(div_by_1_neg):
3338 bra LOCAL(div_ge64k_2)
3350 mov.l LOCAL(zero_l),r1
3356 mov.w LOCAL(m256_w),r1
3358 mov.b r0,@(L_LSWMSB,r15)
3361 bra LOCAL(div_ge64k_end)
3383 rotcl r0; div1 r5,r1
3392 ENDFUNC(GLOBAL(udivsi3_i4i))
3394 .global GLOBAL(sdivsi3_i4i)
3395 FUNC(GLOBAL(sdivsi3_i4i))
3396 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3397 but we effectively clobber only r1. */
3398 GLOBAL(sdivsi3_i4i):
3401 mov.w LOCAL(c128_w), r1
3402 bt/s LOCAL(pos_divisor)
3406 bt/s LOCAL(neg_result)
3415 bf/s LOCAL(div_ge64k)
3423 mov.l LOCAL(zero_l),r1
3430 mov.b r0,@(L_MSWLSB,r15)
3436 mov.b r0,@(L_LSWMSB,r15)
3437 LOCAL(div_ge64k_end):
3441 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3449 LOCAL(div_le128_neg):
3451 mova LOCAL(div_table_ix),r0
3453 mova LOCAL(div_table_inv),r0
3454 bt/s LOCAL(div_by_1_neg)
3456 mova LOCAL(div_table_clz),r0
3471 bt/s LOCAL(pos_result)
3476 bf LOCAL(div_le128_neg)
3480 bf/s LOCAL(div_ge64k_neg)
3483 mov.l LOCAL(zero_l),r1
3490 mov.b r0,@(L_MSWLSB,r15)
3496 mov.b r0,@(L_LSWMSB,r15)
3497 LOCAL(div_ge64k_neg_end):
3501 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3505 LOCAL(div_r8_neg_end):
3511 LOCAL(div_ge64k_neg):
3512 bt/s LOCAL(div_r8_neg)
3515 mov.l LOCAL(zero_l),r1
3521 mov.w LOCAL(m256_w),r1
3523 mov.b r0,@(L_LSWMSB,r15)
3526 bra LOCAL(div_ge64k_neg_end)
3539 rotcl r1; div1 r5,r0
3543 bra LOCAL(div_r8_neg_end)
3548 /* This table has been generated by divtab-sh4.c. */
3550 LOCAL(div_table_clz):
3679 /* Lookup table translating positive divisor to index into table of
3680 normalized inverse. N.B. the '0' entry is also the last entry of the
3681 previous table, and causes an unaligned access for division by zero. */
3682 LOCAL(div_table_ix):
3812 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3847 LOCAL(div_table_inv):
3880 /* maximum error: 0.987342 scaled: 0.921875*/
3882 ENDFUNC(GLOBAL(sdivsi3_i4i))
3883 #endif /* SH3 / SH4 */
3885 #endif /* L_div_table */
3887 #ifdef L_udiv_qrnnd_16
3889 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3890 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3891 /* n1 < d, but n1 might be larger than d1. */
3892 .global GLOBAL(udiv_qrnnd_16)
3894 GLOBAL(udiv_qrnnd_16):
3931 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3932 #endif /* !__SHMEDIA__ */
3933 #endif /* L_udiv_qrnnd_16 */