1 /* Copyright (C) 1994-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
23 !! libgcc routines for the Renesas / SuperH SH CPUs.
24 !! Contributed by Steve Chamberlain.
27 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28 !! recoded in assembly by Toshiyasu Morita
31 #if defined(__ELF__) && defined(__linux__)
32 .section .note.GNU-stack,"",%progbits
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
40 #include "lib1funcs.h"
42 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 so it is more convenient to define NO_FPSCR_VALUES here than to
44 define it on the command line. */
45 #if defined __vxworks && defined __PIC__
46 #define NO_FPSCR_VALUES
51 .global GLOBAL(ashiftrt_r4_0)
52 .global GLOBAL(ashiftrt_r4_1)
53 .global GLOBAL(ashiftrt_r4_2)
54 .global GLOBAL(ashiftrt_r4_3)
55 .global GLOBAL(ashiftrt_r4_4)
56 .global GLOBAL(ashiftrt_r4_5)
57 .global GLOBAL(ashiftrt_r4_6)
58 .global GLOBAL(ashiftrt_r4_7)
59 .global GLOBAL(ashiftrt_r4_8)
60 .global GLOBAL(ashiftrt_r4_9)
61 .global GLOBAL(ashiftrt_r4_10)
62 .global GLOBAL(ashiftrt_r4_11)
63 .global GLOBAL(ashiftrt_r4_12)
64 .global GLOBAL(ashiftrt_r4_13)
65 .global GLOBAL(ashiftrt_r4_14)
66 .global GLOBAL(ashiftrt_r4_15)
67 .global GLOBAL(ashiftrt_r4_16)
68 .global GLOBAL(ashiftrt_r4_17)
69 .global GLOBAL(ashiftrt_r4_18)
70 .global GLOBAL(ashiftrt_r4_19)
71 .global GLOBAL(ashiftrt_r4_20)
72 .global GLOBAL(ashiftrt_r4_21)
73 .global GLOBAL(ashiftrt_r4_22)
74 .global GLOBAL(ashiftrt_r4_23)
75 .global GLOBAL(ashiftrt_r4_24)
76 .global GLOBAL(ashiftrt_r4_25)
77 .global GLOBAL(ashiftrt_r4_26)
78 .global GLOBAL(ashiftrt_r4_27)
79 .global GLOBAL(ashiftrt_r4_28)
80 .global GLOBAL(ashiftrt_r4_29)
81 .global GLOBAL(ashiftrt_r4_30)
82 .global GLOBAL(ashiftrt_r4_31)
83 .global GLOBAL(ashiftrt_r4_32)
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
120 GLOBAL(ashiftrt_r4_32):
121 GLOBAL(ashiftrt_r4_31):
126 GLOBAL(ashiftrt_r4_30):
128 GLOBAL(ashiftrt_r4_29):
130 GLOBAL(ashiftrt_r4_28):
132 GLOBAL(ashiftrt_r4_27):
134 GLOBAL(ashiftrt_r4_26):
136 GLOBAL(ashiftrt_r4_25):
138 GLOBAL(ashiftrt_r4_24):
144 GLOBAL(ashiftrt_r4_23):
146 GLOBAL(ashiftrt_r4_22):
148 GLOBAL(ashiftrt_r4_21):
150 GLOBAL(ashiftrt_r4_20):
152 GLOBAL(ashiftrt_r4_19):
154 GLOBAL(ashiftrt_r4_18):
156 GLOBAL(ashiftrt_r4_17):
158 GLOBAL(ashiftrt_r4_16):
163 GLOBAL(ashiftrt_r4_15):
165 GLOBAL(ashiftrt_r4_14):
167 GLOBAL(ashiftrt_r4_13):
169 GLOBAL(ashiftrt_r4_12):
171 GLOBAL(ashiftrt_r4_11):
173 GLOBAL(ashiftrt_r4_10):
175 GLOBAL(ashiftrt_r4_9):
177 GLOBAL(ashiftrt_r4_8):
179 GLOBAL(ashiftrt_r4_7):
181 GLOBAL(ashiftrt_r4_6):
183 GLOBAL(ashiftrt_r4_5):
185 GLOBAL(ashiftrt_r4_4):
187 GLOBAL(ashiftrt_r4_3):
189 GLOBAL(ashiftrt_r4_2):
191 GLOBAL(ashiftrt_r4_1):
195 GLOBAL(ashiftrt_r4_0):
199 ENDFUNC(GLOBAL(ashiftrt_r4_0))
200 ENDFUNC(GLOBAL(ashiftrt_r4_1))
201 ENDFUNC(GLOBAL(ashiftrt_r4_2))
202 ENDFUNC(GLOBAL(ashiftrt_r4_3))
203 ENDFUNC(GLOBAL(ashiftrt_r4_4))
204 ENDFUNC(GLOBAL(ashiftrt_r4_5))
205 ENDFUNC(GLOBAL(ashiftrt_r4_6))
206 ENDFUNC(GLOBAL(ashiftrt_r4_7))
207 ENDFUNC(GLOBAL(ashiftrt_r4_8))
208 ENDFUNC(GLOBAL(ashiftrt_r4_9))
209 ENDFUNC(GLOBAL(ashiftrt_r4_10))
210 ENDFUNC(GLOBAL(ashiftrt_r4_11))
211 ENDFUNC(GLOBAL(ashiftrt_r4_12))
212 ENDFUNC(GLOBAL(ashiftrt_r4_13))
213 ENDFUNC(GLOBAL(ashiftrt_r4_14))
214 ENDFUNC(GLOBAL(ashiftrt_r4_15))
215 ENDFUNC(GLOBAL(ashiftrt_r4_16))
216 ENDFUNC(GLOBAL(ashiftrt_r4_17))
217 ENDFUNC(GLOBAL(ashiftrt_r4_18))
218 ENDFUNC(GLOBAL(ashiftrt_r4_19))
219 ENDFUNC(GLOBAL(ashiftrt_r4_20))
220 ENDFUNC(GLOBAL(ashiftrt_r4_21))
221 ENDFUNC(GLOBAL(ashiftrt_r4_22))
222 ENDFUNC(GLOBAL(ashiftrt_r4_23))
223 ENDFUNC(GLOBAL(ashiftrt_r4_24))
224 ENDFUNC(GLOBAL(ashiftrt_r4_25))
225 ENDFUNC(GLOBAL(ashiftrt_r4_26))
226 ENDFUNC(GLOBAL(ashiftrt_r4_27))
227 ENDFUNC(GLOBAL(ashiftrt_r4_28))
228 ENDFUNC(GLOBAL(ashiftrt_r4_29))
229 ENDFUNC(GLOBAL(ashiftrt_r4_30))
230 ENDFUNC(GLOBAL(ashiftrt_r4_31))
231 ENDFUNC(GLOBAL(ashiftrt_r4_32))
253 .global GLOBAL(ashrsi3)
254 HIDDEN_FUNC(GLOBAL(ashrsi3))
259 mova LOCAL(ashrsi3_table),r0
270 LOCAL(ashrsi3_table):
271 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
382 ENDFUNC(GLOBAL(ashrsi3))
389 ! (For compatibility with older binaries, not used by compiler)
414 .global GLOBAL(ashlsi3)
415 .global GLOBAL(ashlsi3_r0)
416 HIDDEN_FUNC(GLOBAL(ashlsi3))
417 HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
428 mova LOCAL(ashlsi3_table),r0
441 LOCAL(ashlsi3_table):
447 LOCAL(ashlsi_2): // << 2
450 bra LOCAL(ashlsi_1) // << 3
452 bra LOCAL(ashlsi_2) // << 4
454 bra LOCAL(ashlsi_5) // << 5
456 bra LOCAL(ashlsi_6) // << 6
458 bra LOCAL(ashlsi_7) // << 7
460 LOCAL(ashlsi_8): // << 8
463 bra LOCAL(ashlsi_8) // << 9
465 bra LOCAL(ashlsi_8) // << 10
467 bra LOCAL(ashlsi_11) // << 11
469 bra LOCAL(ashlsi_12) // << 12
471 bra LOCAL(ashlsi_13) // << 13
473 bra LOCAL(ashlsi_14) // << 14
475 bra LOCAL(ashlsi_15) // << 15
477 LOCAL(ashlsi_16): // << 16
480 bra LOCAL(ashlsi_16) // << 17
482 bra LOCAL(ashlsi_16) // << 18
484 bra LOCAL(ashlsi_19) // << 19
486 bra LOCAL(ashlsi_20) // << 20
488 bra LOCAL(ashlsi_21) // << 21
490 bra LOCAL(ashlsi_22) // << 22
492 bra LOCAL(ashlsi_23) // << 23
494 bra LOCAL(ashlsi_16) // << 24
496 bra LOCAL(ashlsi_25) // << 25
498 bra LOCAL(ashlsi_26) // << 26
500 bra LOCAL(ashlsi_27) // << 27
502 bra LOCAL(ashlsi_28) // << 28
504 bra LOCAL(ashlsi_29) // << 29
506 bra LOCAL(ashlsi_30) // << 30
558 ENDFUNC(GLOBAL(ashlsi3))
559 ENDFUNC(GLOBAL(ashlsi3_r0))
566 ! (For compatibility with older binaries, not used by compiler)
591 .global GLOBAL(lshrsi3)
592 .global GLOBAL(lshrsi3_r0)
593 HIDDEN_FUNC(GLOBAL(lshrsi3))
594 HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
605 mova LOCAL(lshrsi3_table),r0
617 LOCAL(lshrsi3_table):
620 LOCAL(lshrsi_1): // >> 1
623 LOCAL(lshrsi_2): // >> 2
626 bra LOCAL(lshrsi_1) // >> 3
628 bra LOCAL(lshrsi_2) // >> 4
630 bra LOCAL(lshrsi_5) // >> 5
632 bra LOCAL(lshrsi_6) // >> 6
634 bra LOCAL(lshrsi_7) // >> 7
636 LOCAL(lshrsi_8): // >> 8
639 bra LOCAL(lshrsi_8) // >> 9
641 bra LOCAL(lshrsi_8) // >> 10
643 bra LOCAL(lshrsi_11) // >> 11
645 bra LOCAL(lshrsi_12) // >> 12
647 bra LOCAL(lshrsi_13) // >> 13
649 bra LOCAL(lshrsi_14) // >> 14
651 bra LOCAL(lshrsi_15) // >> 15
653 LOCAL(lshrsi_16): // >> 16
656 bra LOCAL(lshrsi_16) // >> 17
658 bra LOCAL(lshrsi_16) // >> 18
660 bra LOCAL(lshrsi_19) // >> 19
662 bra LOCAL(lshrsi_20) // >> 20
664 bra LOCAL(lshrsi_21) // >> 21
666 bra LOCAL(lshrsi_22) // >> 22
668 bra LOCAL(lshrsi_23) // >> 23
670 bra LOCAL(lshrsi_16) // >> 24
672 bra LOCAL(lshrsi_25) // >> 25
674 bra LOCAL(lshrsi_26) // >> 26
676 bra LOCAL(lshrsi_27) // >> 27
678 bra LOCAL(lshrsi_28) // >> 28
680 bra LOCAL(lshrsi_29) // >> 29
682 bra LOCAL(lshrsi_30) // >> 30
734 ENDFUNC(GLOBAL(lshrsi3))
735 ENDFUNC(GLOBAL(lshrsi3_r0))
741 .global GLOBAL(movmem)
742 HIDDEN_FUNC(GLOBAL(movmem))
743 HIDDEN_ALIAS(movstr,movmem)
744 /* This would be a lot simpler if r6 contained the byte count
745 minus 64, and we wouldn't be called here for a byte count of 64. */
749 bsr GLOBAL(movmemSI52+2)
752 LOCAL(movmem_loop): /* Reached with rts */
758 bt LOCAL(movmem_done)
765 bt GLOBAL(movmemSI52)
766 ! done all the large groups, do the remainder
768 mova GLOBAL(movmemSI4)+4,r0
771 LOCAL(movmem_done): ! share slot insn, works out aligned.
778 ! ??? We need aliases movstr* for movmem* for the older libraries. These
779 ! aliases will be removed at the some point in the future.
780 .global GLOBAL(movmemSI64)
781 HIDDEN_FUNC(GLOBAL(movmemSI64))
782 HIDDEN_ALIAS(movstrSI64,movmemSI64)
786 .global GLOBAL(movmemSI60)
787 HIDDEN_FUNC(GLOBAL(movmemSI60))
788 HIDDEN_ALIAS(movstrSI60,movmemSI60)
792 .global GLOBAL(movmemSI56)
793 HIDDEN_FUNC(GLOBAL(movmemSI56))
794 HIDDEN_ALIAS(movstrSI56,movmemSI56)
798 .global GLOBAL(movmemSI52)
799 HIDDEN_FUNC(GLOBAL(movmemSI52))
800 HIDDEN_ALIAS(movstrSI52,movmemSI52)
804 .global GLOBAL(movmemSI48)
805 HIDDEN_FUNC(GLOBAL(movmemSI48))
806 HIDDEN_ALIAS(movstrSI48,movmemSI48)
810 .global GLOBAL(movmemSI44)
811 HIDDEN_FUNC(GLOBAL(movmemSI44))
812 HIDDEN_ALIAS(movstrSI44,movmemSI44)
816 .global GLOBAL(movmemSI40)
817 HIDDEN_FUNC(GLOBAL(movmemSI40))
818 HIDDEN_ALIAS(movstrSI40,movmemSI40)
822 .global GLOBAL(movmemSI36)
823 HIDDEN_FUNC(GLOBAL(movmemSI36))
824 HIDDEN_ALIAS(movstrSI36,movmemSI36)
828 .global GLOBAL(movmemSI32)
829 HIDDEN_FUNC(GLOBAL(movmemSI32))
830 HIDDEN_ALIAS(movstrSI32,movmemSI32)
834 .global GLOBAL(movmemSI28)
835 HIDDEN_FUNC(GLOBAL(movmemSI28))
836 HIDDEN_ALIAS(movstrSI28,movmemSI28)
840 .global GLOBAL(movmemSI24)
841 HIDDEN_FUNC(GLOBAL(movmemSI24))
842 HIDDEN_ALIAS(movstrSI24,movmemSI24)
846 .global GLOBAL(movmemSI20)
847 HIDDEN_FUNC(GLOBAL(movmemSI20))
848 HIDDEN_ALIAS(movstrSI20,movmemSI20)
852 .global GLOBAL(movmemSI16)
853 HIDDEN_FUNC(GLOBAL(movmemSI16))
854 HIDDEN_ALIAS(movstrSI16,movmemSI16)
858 .global GLOBAL(movmemSI12)
859 HIDDEN_FUNC(GLOBAL(movmemSI12))
860 HIDDEN_ALIAS(movstrSI12,movmemSI12)
864 .global GLOBAL(movmemSI8)
865 HIDDEN_FUNC(GLOBAL(movmemSI8))
866 HIDDEN_ALIAS(movstrSI8,movmemSI8)
870 .global GLOBAL(movmemSI4)
871 HIDDEN_FUNC(GLOBAL(movmemSI4))
872 HIDDEN_ALIAS(movstrSI4,movmemSI4)
878 ENDFUNC(GLOBAL(movmemSI64))
879 ENDFUNC(GLOBAL(movmemSI60))
880 ENDFUNC(GLOBAL(movmemSI56))
881 ENDFUNC(GLOBAL(movmemSI52))
882 ENDFUNC(GLOBAL(movmemSI48))
883 ENDFUNC(GLOBAL(movmemSI44))
884 ENDFUNC(GLOBAL(movmemSI40))
885 ENDFUNC(GLOBAL(movmemSI36))
886 ENDFUNC(GLOBAL(movmemSI32))
887 ENDFUNC(GLOBAL(movmemSI28))
888 ENDFUNC(GLOBAL(movmemSI24))
889 ENDFUNC(GLOBAL(movmemSI20))
890 ENDFUNC(GLOBAL(movmemSI16))
891 ENDFUNC(GLOBAL(movmemSI12))
892 ENDFUNC(GLOBAL(movmemSI8))
893 ENDFUNC(GLOBAL(movmemSI4))
894 ENDFUNC(GLOBAL(movmem))
899 .global GLOBAL(movmem_i4_even)
900 .global GLOBAL(movmem_i4_odd)
901 .global GLOBAL(movmemSI12_i4)
903 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
907 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
919 GLOBAL(movmem_i4_even):
921 bra L_movmem_start_even
924 GLOBAL(movmem_i4_odd):
936 bt/s L_movmem_2mod4_end
950 ENDFUNC(GLOBAL(movmem_i4_even))
951 ENDFUNC(GLOBAL(movmem_i4_odd))
954 GLOBAL(movmemSI12_i4):
963 ENDFUNC(GLOBAL(movmemSI12_i4))
969 .global GLOBAL(mulsi3)
970 HIDDEN_FUNC(GLOBAL(mulsi3))
974 ! r0 = aabb*ccdd via partial products
976 ! if aa == 0 and cc = 0
980 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
984 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
985 mov r5,r3 ! r3 = ccdd
986 swap.w r4,r2 ! r2 = bbaa
987 xtrct r2,r3 ! r3 = aacc
988 tst r3,r3 ! msws zero ?
990 rts ! yes - then we have the answer
993 hiset: sts macl,r0 ! r0 = bb*dd
994 mulu.w r2,r5 ! brewing macl = aa*dd
996 mulu.w r3,r4 ! brewing macl = cc*bb
1003 ENDFUNC(GLOBAL(mulsi3))
1005 #endif /* ! __SH5__ */
1007 /*------------------------------------------------------------------------------
1008 32 bit signed integer division that uses FPU double precision division. */
1013 #if defined (__SH4__) || defined (__SH2A__)
1014 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1016 Args in r4 and r5, result in fpul, clobber dr0, dr2. */
1018 .global GLOBAL(sdivsi3_i4)
1019 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1029 ENDFUNC(GLOBAL(sdivsi3_i4))
1031 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1032 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1034 Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1035 For this to work, we must temporarily switch the FPU do double precision,
1036 but we better do not touch FPSCR.FR. See PR 6526. */
1038 #if ! __SH5__ || __SH5__ == 32
1042 .global GLOBAL(sdivsi3_i4)
1043 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1050 swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit)
1052 lds r3,fpscr // Set FPSCR.PR = 1.
1063 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */
1074 #endif /* __SH4A__ */
1076 ENDFUNC(GLOBAL(sdivsi3_i4))
1077 #endif /* ! __SH5__ || __SH5__ == 32 */
1078 #endif /* ! __SH4__ || __SH2A__ */
1079 #endif /* L_sdivsi3_i4 */
1081 //------------------------------------------------------------------------------
1083 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1086 !! Steve Chamberlain
1091 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1093 .global GLOBAL(sdivsi3)
1096 .section .text..SHmedia32,"ax"
1102 /* The assembly code that follows is a hand-optimized version of the C
1103 code that follows. Note that the registers that are modified are
1104 exactly those listed as clobbered in the patterns divsi3_i1 and
1107 int __sdivsi3 (i, j)
1110 register unsigned long long r18 asm ("r18");
1111 register unsigned long long r19 asm ("r19");
1112 register unsigned long long r0 asm ("r0") = 0;
1113 register unsigned long long r1 asm ("r1") = 1;
1114 register int r2 asm ("r2") = i >> 31;
1115 register int r3 asm ("r3") = j >> 31;
1127 r0 |= r1, r18 -= r19;
1128 while (r19 >>= 1, r1 >>= 1);
1130 return r2 * (int)r0;
1134 pt/l LOCAL(sdivsi3_dontadd), tr2
1135 pt/l LOCAL(sdivsi3_loop), tr1
1148 LOCAL(sdivsi3_loop):
1152 LOCAL(sdivsi3_dontadd):
1161 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1164 // can create absolute value without extra latency,
1165 // but dependent on proper sign extension of inputs:
1168 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1171 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1172 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1180 // If r4 was to be used in-place instead of r21, could use this sequence
1181 // to compute absolute:
1182 // sub r63,r4,r19 // compute absolute value of r4
1183 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1184 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1196 mmacnfx.wl r25,r2,r1
1221 #else /* ! 0 && ! 0 */
1224 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1226 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1228 FUNC(GLOBAL(sdivsi3))
1229 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1230 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1231 // with the SHcompact implementation, which clobbers tr1 / tr2.
1232 .global GLOBAL(sdivsi3_1)
1234 .global GLOBAL(div_table_internal)
1235 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1236 shori GLOBAL(div_table_internal) & 65535, r20
1238 .global GLOBAL(sdivsi3_2)
1240 // clobbered: r1,r18,r19,r21,r25,tr0
1243 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1244 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1245 ldx.ub r20, r21, r19 // u0.8
1246 shari r25, 32, r25 // normalize to s2.30
1248 muls.l r25, r19, r19 // s2.38
1249 ldx.w r20, r21, r21 // s2.14
1251 shari r19, 24, r19 // truncate to s2.14
1252 sub r21, r19, r19 // some 11 bit inverse in s1.14
1253 muls.l r19, r19, r21 // u0.28
1256 muls.l r25, r21, r18 // s2.58
1257 shlli r19, 45, r19 // multiply by two and convert to s2.58
1260 shari r18, 28, r18 // some 22 bit inverse in s1.30
1261 muls.l r18, r25, r0 // s2.60
1262 muls.l r18, r4, r25 // s32.30
1264 shari r0, 16, r19 // s-16.44
1265 muls.l r19, r18, r19 // s-16.74
1267 shari r4, 14, r18 // s19.-14
1268 shari r19, 30, r19 // s-16.44
1269 muls.l r19, r18, r19 // s15.30
1270 xor r21, r0, r21 // You could also use the constant 1 << 27.
1277 ENDFUNC(GLOBAL(sdivsi3))
1279 ENDFUNC(GLOBAL(sdivsi3_2))
1281 #elif defined __SHMEDIA__
1282 /* m5compact-nofpu */
1283 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1285 .section .text..SHmedia32,"ax"
1287 FUNC(GLOBAL(sdivsi3))
1289 pt/l LOCAL(sdivsi3_dontsub), tr0
1290 pt/l LOCAL(sdivsi3_loop), tr1
1302 LOCAL(sdivsi3_loop):
1306 LOCAL(sdivsi3_dontsub):
1312 ENDFUNC(GLOBAL(sdivsi3))
1313 #else /* ! __SHMEDIA__ */
1314 FUNC(GLOBAL(sdivsi3))
1399 ENDFUNC(GLOBAL(sdivsi3))
1400 #endif /* ! __SHMEDIA__ */
1401 #endif /* L_sdivsi3 */
1403 /*------------------------------------------------------------------------------
1404 32 bit unsigned integer division that uses FPU double precision division. */
1409 #if defined (__SH4__) || defined (__SH2A__)
1410 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1412 Args in r4 and r5, result in fpul,
1413 clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */
1415 .global GLOBAL(udivsi3_i4)
1416 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1447 .align 3 // Make the double below 8 byte aligned.
1452 ENDFUNC(GLOBAL(udivsi3_i4))
1454 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
1455 #if ! __SH5__ || __SH5__ == 32
1456 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1458 .global GLOBAL(udivsi3_i4)
1459 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1473 ENDFUNC(GLOBAL(udivsi3_i4))
1474 #endif /* ! __SH5__ || __SH5__ == 32 */
1476 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1477 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1479 Args in r4 and r5, result in fpul,
1480 clobber r0, r1, r4, r5, dr0, dr2, dr4.
1481 For this to work, we must temporarily switch the FPU do double precision,
1482 but we better do not touch FPSCR.FR. See PR 6526. */
1484 .global GLOBAL(udivsi3_i4)
1485 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1492 rotr r1 // r1 = 1 << 31
1519 .align 3 // Make the double below 8 byte aligned.
1528 .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1
1530 .long 0x80000 // FPSCR.PR = 1
1536 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1537 Although on SH4A fmovd usually works, it would require either additional
1538 two fschg instructions or an FPSCR push + pop. It's not worth the effort
1539 for loading only one double constant. */
1543 rotr r1 // r1 = 1 << 31
1569 #endif /* __SH4A__ */
1572 ENDFUNC(GLOBAL(udivsi3_i4))
1573 #endif /* ! __SH4__ */
1574 #endif /* L_udivsi3_i4 */
1577 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1580 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1581 .global GLOBAL(udivsi3)
1582 HIDDEN_FUNC(GLOBAL(udivsi3))
1586 .section .text..SHmedia32,"ax"
1592 /* The assembly code that follows is a hand-optimized version of the C
1593 code that follows. Note that the registers that are modified are
1594 exactly those listed as clobbered in the patterns udivsi3_i1 and
1601 register unsigned long long r0 asm ("r0") = 0;
1602 register unsigned long long r18 asm ("r18") = 1;
1603 register unsigned long long r4 asm ("r4") = i;
1604 register unsigned long long r19 asm ("r19") = j;
1610 r0 |= r18, r4 -= r19;
1611 while (r19 >>= 1, r18 >>= 1);
1617 pt/l LOCAL(udivsi3_dontadd), tr2
1618 pt/l LOCAL(udivsi3_loop), tr1
1626 LOCAL(udivsi3_loop):
1630 LOCAL(udivsi3_dontadd):
1638 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1644 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1646 mmulfx.w r21,r21,r19
1647 mshflo.w r21,r63,r21
1649 mmulfx.w r25,r19,r19
1653 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1654 before the msub.w, but we need a different value for
1655 r19 to keep errors under control. */
1657 mmulfx.w r19,r19,r19
1661 mmacnfx.wl r25,r19,r21
1686 #elif defined (__SHMEDIA__)
1687 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1688 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1689 So use a short shmedia loop. */
1690 // clobbered: r20,r21,r25,tr0,tr1,tr2
1692 .section .text..SHmedia32,"ax"
1695 pt/l LOCAL(udivsi3_dontsub), tr0
1696 pt/l LOCAL(udivsi3_loop), tr1
1701 LOCAL(udivsi3_loop):
1705 LOCAL(udivsi3_dontsub):
1710 #else /* ! defined (__SHMEDIA__) */
1714 div1 r5,r4; div1 r5,r4; div1 r5,r4
1715 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1718 div1 r5,r4; rotcl r0
1719 div1 r5,r4; rotcl r0
1720 div1 r5,r4; rotcl r0
1728 bf LOCAL(large_divisor)
1730 bf/s LOCAL(large_divisor)
1752 LOCAL(large_divisor):
1771 ENDFUNC(GLOBAL(udivsi3))
1772 #endif /* ! __SHMEDIA__ */
1773 #endif /* L_udivsi3 */
1778 .section .text..SHmedia32,"ax"
1780 .global GLOBAL(udivdi3)
1781 FUNC(GLOBAL(udivdi3))
1783 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1788 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1792 sub r63,r22,r20 // r63 == 64 % 64
1794 pta LOCAL(large_divisor),tr0
1800 bgt/u r9,r63,tr0 // large_divisor
1809 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1810 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1811 the case may be, %0000000000000000 000.11111111111, still */
1812 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1817 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1819 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1828 mcmpgt.l r21,r63,r21 // See Note 1
1830 mshfhi.l r63,r21,r21
1834 /* small divisor: need a third divide step */
1844 /* could test r3 here to check for divide by zero. */
1847 LOCAL(large_divisor):
1856 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1857 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1858 the case may be, %0000000000000000 000.11111111111, still */
1859 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1864 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1866 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1870 pta LOCAL(no_lo_adj),tr0
1877 bgtu/u r7,r25,tr0 // no_lo_adj
1883 /* large_divisor: only needs a few adjustments. */
1890 ENDFUNC(GLOBAL(udivdi3))
1891 /* Note 1: To shift the result of the second divide stage so that the result
1892 always fits into 32 bits, yet we still reduce the rest sufficiently
1893 would require a lot of instructions to do the shifts just right. Using
1894 the full 64 bit shift result to multiply with the divisor would require
1895 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1896 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1897 know that the rest after taking this partial result into account will
1898 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1899 upper 32 bits of the partial result are nonzero. */
1900 #endif /* __SHMEDIA__ */
1901 #endif /* L_udivdi3 */
1906 .section .text..SHmedia32,"ax"
1908 .global GLOBAL(divdi3)
1909 FUNC(GLOBAL(divdi3))
1911 pta GLOBAL(udivdi3_internal),tr0
1923 ENDFUNC(GLOBAL(divdi3))
1924 #endif /* __SHMEDIA__ */
1925 #endif /* L_divdi3 */
1930 .section .text..SHmedia32,"ax"
1932 .global GLOBAL(umoddi3)
1933 FUNC(GLOBAL(umoddi3))
1935 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1940 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1944 sub r63,r22,r20 // r63 == 64 % 64
1946 pta LOCAL(large_divisor),tr0
1952 bgt/u r9,r63,tr0 // large_divisor
1961 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1962 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1963 the case may be, %0000000000000000 000.11111111111, still */
1964 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1969 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1971 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1976 /* bubble */ /* could test r3 here to check for divide by zero. */
1979 mcmpgt.l r21,r63,r21 // See Note 1
1981 mshfhi.l r63,r21,r21
1985 /* small divisor: need a third divide step */
1988 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1998 LOCAL(large_divisor):
2007 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
2008 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
2009 the case may be, %0000000000000000 000.11111111111, still */
2010 muls.l r1,r4,r4 /* leaving at least one sign bit. */
2015 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
2017 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
2021 pta LOCAL(no_lo_adj),tr0
2028 bgtu/u r7,r25,tr0 // no_lo_adj
2034 /* large_divisor: only needs a few adjustments. */
2043 ENDFUNC(GLOBAL(umoddi3))
2044 /* Note 1: To shift the result of the second divide stage so that the result
2045 always fits into 32 bits, yet we still reduce the rest sufficiently
2046 would require a lot of instructions to do the shifts just right. Using
2047 the full 64 bit shift result to multiply with the divisor would require
2048 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
2049 Fortunately, if the upper 32 bits of the shift result are nonzero, we
2050 know that the rest after taking this partial result into account will
2051 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
2052 upper 32 bits of the partial result are nonzero. */
2053 #endif /* __SHMEDIA__ */
2054 #endif /* L_umoddi3 */
2059 .section .text..SHmedia32,"ax"
2061 .global GLOBAL(moddi3)
2062 FUNC(GLOBAL(moddi3))
2064 pta GLOBAL(umoddi3_internal),tr0
2076 ENDFUNC(GLOBAL(moddi3))
2077 #endif /* __SHMEDIA__ */
2078 #endif /* L_moddi3 */
2081 #if !defined (__SH2A_NOFPU__)
2082 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
2086 .global GLOBAL(set_fpscr)
2087 HIDDEN_FUNC(GLOBAL(set_fpscr))
2093 mov.l LOCAL(set_fpscr_L0_base),r12
2094 mov.l LOCAL(set_fpscr_L0_index),r0
2098 mova LOCAL(set_fpscr_L0),r0
2099 mov.l LOCAL(set_fpscr_L0),r12
2102 mov.l LOCAL(set_fpscr_L1),r0
2106 mov.l LOCAL(set_fpscr_L1),r1
2113 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2116 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2125 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2129 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2137 LOCAL(set_fpscr_L0_base):
2138 .long ___GOTT_BASE__
2139 LOCAL(set_fpscr_L0_index):
2140 .long ___GOTT_INDEX__
2142 LOCAL(set_fpscr_L0):
2143 .long _GLOBAL_OFFSET_TABLE_
2145 LOCAL(set_fpscr_L1):
2146 .long GLOBAL(fpscr_values@GOT)
2148 LOCAL(set_fpscr_L1):
2149 .long GLOBAL(fpscr_values)
2152 ENDFUNC(GLOBAL(set_fpscr))
2153 #ifndef NO_FPSCR_VALUES
2155 .comm GLOBAL(fpscr_values),8,4
2157 .comm GLOBAL(fpscr_values),8
2159 #endif /* NO_FPSCR_VALUES */
2160 #endif /* SH2E / SH3E / SH4 */
2161 #endif /* __SH2A_NOFPU__ */
2162 #endif /* L_set_fpscr */
2163 #ifdef L_ic_invalidate
2166 .section .text..SHmedia32,"ax"
2168 .global GLOBAL(init_trampoline)
2169 HIDDEN_FUNC(GLOBAL(init_trampoline))
2170 GLOBAL(init_trampoline):
2172 #ifdef __LITTLE_ENDIAN__
2178 movi 0xffffffffffffd002,r20
2185 ENDFUNC(GLOBAL(init_trampoline))
2186 .global GLOBAL(ic_invalidate)
2187 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2188 GLOBAL(ic_invalidate):
2195 ENDFUNC(GLOBAL(ic_invalidate))
2196 #elif defined(__SH4A__)
2197 .global GLOBAL(ic_invalidate)
2198 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2199 GLOBAL(ic_invalidate):
2205 ENDFUNC(GLOBAL(ic_invalidate))
2206 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2207 /* For system code, we use ic_invalidate_line_i, but user code
2208 needs a different mechanism. A kernel call is generally not
2209 available, and it would also be slow. Different SH4 variants use
2210 different sizes and associativities of the Icache. We use a small
2211 bit of dispatch code that can be put hidden in every shared object,
2212 which calls the actual processor-specific invalidation code in a
2214 Or if you have operating system support, the OS could mmap the
2215 procesor-specific code from a single page, since it is highly
2217 .global GLOBAL(ic_invalidate)
2218 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2219 GLOBAL(ic_invalidate):
2246 0: .long GLOBAL(ic_invalidate_array)
2248 .global GLOBAL(ic_invalidate_array)
2249 0: .long GLOBAL(ic_invalidate_array)@GOT
2251 1: .long ___GOTT_BASE__
2252 2: .long ___GOTT_INDEX__
2254 1: .long _GLOBAL_OFFSET_TABLE_
2256 ENDFUNC(GLOBAL(ic_invalidate))
2257 #endif /* __pic__ */
2259 #endif /* L_ic_invalidate */
2261 #ifdef L_ic_invalidate_array
2262 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2263 .global GLOBAL(ic_invalidate_array)
2264 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2265 .global GLOBAL(ic_invalidate_array)
2266 FUNC(GLOBAL(ic_invalidate_array))
2267 GLOBAL(ic_invalidate_array):
2275 ENDFUNC(GLOBAL(ic_invalidate_array))
2276 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2277 .global GLOBAL(ic_invalidate_array)
2279 FUNC(GLOBAL(ic_invalidate_array))
2280 /* This must be aligned to the beginning of a cache line. */
2281 GLOBAL(ic_invalidate_array):
2284 #define WAY_SIZE 0x4000
2287 .rept WAY_SIZE * WAYS / 32
2295 .rept WAY_SIZE * WAYS / 32
2309 #else /* WAYS > 6 */
2310 /* This variant needs two different pages for mmap-ing. */
2328 ENDFUNC(GLOBAL(ic_invalidate_array))
2330 #endif /* L_ic_invalidate_array */
2332 #if defined (__SH5__) && __SH5__ == 32
2333 #ifdef L_shcompact_call_trampoline
2336 LOCAL(ct_main_table):
2337 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2338 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2339 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2340 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2341 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2342 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2343 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2344 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2345 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2346 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2347 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2348 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2349 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2350 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2351 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2352 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2353 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2354 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2355 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2356 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2357 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2358 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2359 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2360 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2361 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2362 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2363 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2364 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2365 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2366 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2367 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2368 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2369 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2371 .section .text..SHmedia32, "ax"
2374 /* This function loads 64-bit general-purpose registers from the
2375 stack, from a memory address contained in them or from an FP
2376 register, according to a cookie passed in r1. Its execution
2377 time is linear on the number of registers that actually have
2378 to be copied. See sh.h for details on the actual bit pattern.
2380 The function to be called is passed in r0. If a 32-bit return
2381 value is expected, the actual function will be tail-called,
2382 otherwise the return address will be stored in r10 (that the
2383 caller should expect to be clobbered) and the return value
2384 will be expanded into r2/r3 upon return. */
2386 .global GLOBAL(GCC_shcompact_call_trampoline)
2387 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2388 GLOBAL(GCC_shcompact_call_trampoline):
2389 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2390 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2391 pt/l LOCAL(ct_loop), tr1
2393 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2398 LOCAL(ct_main_label):
2401 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2402 /* It must be dr0, so just do it. */
2408 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2409 /* It is either dr0 or dr2. */
2418 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2419 shlri r1, 23 - 3, r34
2420 andi r34, 3 << 3, r33
2421 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2422 LOCAL(ct_r4_fp_base):
2428 LOCAL(ct_r4_fp_copy):
2435 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2436 shlri r1, 20 - 3, r34
2437 andi r34, 3 << 3, r33
2438 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2439 LOCAL(ct_r5_fp_base):
2445 LOCAL(ct_r5_fp_copy):
2454 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2455 /* It must be dr8. */
2461 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2462 shlri r1, 16 - 3, r34
2463 andi r34, 3 << 3, r33
2464 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2465 LOCAL(ct_r6_fp_base):
2471 LOCAL(ct_r6_fp_copy):
2480 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2481 /* It is either dr8 or dr10. */
2489 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2490 shlri r1, 12 - 3, r34
2491 andi r34, 3 << 3, r33
2492 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2493 LOCAL(ct_r7_fp_base):
2498 LOCAL(ct_r7_fp_copy):
2507 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2508 /* It is either dr8 or dr10. */
2510 andi r1, 1 << 8, r32
2516 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2517 shlri r1, 8 - 3, r34
2518 andi r34, 3 << 3, r33
2519 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2520 LOCAL(ct_r8_fp_base):
2525 LOCAL(ct_r8_fp_copy):
2534 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2535 /* It is either dr8 or dr10. */
2537 andi r1, 1 << 4, r32
2543 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2544 shlri r1, 4 - 3, r34
2545 andi r34, 3 << 3, r33
2546 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2547 LOCAL(ct_r9_fp_base):
2552 LOCAL(ct_r9_fp_copy):
2561 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2562 pt/l LOCAL(ct_r2_load), tr2
2571 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2572 pt/l LOCAL(ct_r3_load), tr2
2580 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2581 pt/l LOCAL(ct_r4_load), tr2
2589 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2590 pt/l LOCAL(ct_r5_load), tr2
2598 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2599 pt/l LOCAL(ct_r6_load), tr2
2606 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2607 pt/l LOCAL(ct_r7_load), tr2
2614 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2615 pt/l LOCAL(ct_r8_load), tr2
2622 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2623 pt/l LOCAL(ct_check_tramp), tr2
2647 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2654 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2661 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2668 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2675 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2682 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2688 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2694 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2695 andi r1, 7 << 1, r30
2696 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2698 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2702 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2715 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2718 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2719 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2720 pt/u LOCAL(ct_ret_wide), tr2
2723 LOCAL(ct_call_func): /* Just branch to the function. */
2725 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2726 64-bit return value. */
2730 #if __LITTLE_ENDIAN__
2739 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2740 #endif /* L_shcompact_call_trampoline */
2742 #ifdef L_shcompact_return_trampoline
2743 /* This function does the converse of the code in `ret_wide'
2744 above. It is tail-called by SHcompact functions returning
2745 64-bit non-floating-point values, to pack the 32-bit values in
2746 r2 and r3 into r2. */
2749 .section .text..SHmedia32, "ax"
2751 .global GLOBAL(GCC_shcompact_return_trampoline)
2752 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2753 GLOBAL(GCC_shcompact_return_trampoline):
2755 #if __LITTLE_ENDIAN__
2765 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2766 #endif /* L_shcompact_return_trampoline */
2768 #ifdef L_shcompact_incoming_args
2771 LOCAL(ia_main_table):
2772 .word 1 /* Invalid, just loop */
2773 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2774 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2775 .word 1 /* Invalid, just loop */
2776 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2777 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2778 .word 1 /* Invalid, just loop */
2779 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2780 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2781 .word 1 /* Invalid, just loop */
2782 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2783 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2784 .word 1 /* Invalid, just loop */
2785 .word 1 /* Invalid, just loop */
2786 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2787 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2788 .word 1 /* Invalid, just loop */
2789 .word 1 /* Invalid, just loop */
2790 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2791 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2792 .word 1 /* Invalid, just loop */
2793 .word 1 /* Invalid, just loop */
2794 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2795 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2796 .word 1 /* Invalid, just loop */
2797 .word 1 /* Invalid, just loop */
2798 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2799 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2800 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2801 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2802 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2803 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2804 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2806 .section .text..SHmedia32, "ax"
2809 /* This function stores 64-bit general-purpose registers back in
2810 the stack, and loads the address in which each register
2811 was stored into itself. The lower 32 bits of r17 hold the address
2812 to begin storing, and the upper 32 bits of r17 hold the cookie.
2813 Its execution time is linear on the
2814 number of registers that actually have to be copied, and it is
2815 optimized for structures larger than 64 bits, as opposed to
2816 individual `long long' arguments. See sh.h for details on the
2817 actual bit pattern. */
2819 .global GLOBAL(GCC_shcompact_incoming_args)
2820 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2821 GLOBAL(GCC_shcompact_incoming_args):
2822 ptabs/l r18, tr0 /* Prepare to return. */
2823 shlri r17, 32, r0 /* Load the cookie. */
2824 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2825 pt/l LOCAL(ia_loop), tr1
2827 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2832 LOCAL(ia_main_label):
2835 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2844 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2853 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2862 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2871 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2880 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2888 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2896 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2900 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2907 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2914 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2921 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2928 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2935 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2941 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2947 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2948 andi r0, 7 << 1, r38
2949 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2951 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2955 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2968 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2970 LOCAL(ia_return): /* Return. */
2972 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2973 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2974 #endif /* L_shcompact_incoming_args */
2977 #ifdef L_nested_trampoline
2979 .section .text..SHmedia32,"ax"
2983 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2984 .global GLOBAL(GCC_nested_trampoline)
2985 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2986 GLOBAL(GCC_nested_trampoline):
3003 ENDFUNC(GLOBAL(GCC_nested_trampoline))
3004 #endif /* L_nested_trampoline */
3005 #endif /* __SH5__ */
3007 #ifdef L_push_pop_shmedia_regs
3008 .section .text..SHmedia32,"ax"
3011 #ifndef __SH4_NOFPU__
3012 .global GLOBAL(GCC_push_shmedia_regs)
3013 FUNC(GLOBAL(GCC_push_shmedia_regs))
3014 GLOBAL(GCC_push_shmedia_regs):
3015 addi.l r15, -14*8, r15
3016 fst.d r15, 13*8, dr62
3017 fst.d r15, 12*8, dr60
3018 fst.d r15, 11*8, dr58
3019 fst.d r15, 10*8, dr56
3020 fst.d r15, 9*8, dr54
3021 fst.d r15, 8*8, dr52
3022 fst.d r15, 7*8, dr50
3023 fst.d r15, 6*8, dr48
3024 fst.d r15, 5*8, dr46
3025 fst.d r15, 4*8, dr44
3026 fst.d r15, 3*8, dr42
3027 fst.d r15, 2*8, dr40
3028 fst.d r15, 1*8, dr38
3029 fst.d r15, 0*8, dr36
3030 #else /* ! __SH4_NOFPU__ */
3031 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
3032 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3033 GLOBAL(GCC_push_shmedia_regs_nofpu):
3034 #endif /* ! __SH4_NOFPU__ */
3036 addi.l r15, -27*8, r15
3068 #ifndef __SH4_NOFPU__
3069 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
3071 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3073 #ifndef __SH4_NOFPU__
3074 .global GLOBAL(GCC_pop_shmedia_regs)
3075 FUNC(GLOBAL(GCC_pop_shmedia_regs))
3076 GLOBAL(GCC_pop_shmedia_regs):
3079 fld.d r15, 40*8, dr62
3080 fld.d r15, 39*8, dr60
3081 fld.d r15, 38*8, dr58
3082 fld.d r15, 37*8, dr56
3083 fld.d r15, 36*8, dr54
3084 fld.d r15, 35*8, dr52
3085 fld.d r15, 34*8, dr50
3086 fld.d r15, 33*8, dr48
3087 fld.d r15, 32*8, dr46
3088 fld.d r15, 31*8, dr44
3089 fld.d r15, 30*8, dr42
3090 fld.d r15, 29*8, dr40
3091 fld.d r15, 28*8, dr38
3092 fld.d r15, 27*8, dr36
3094 #else /* ! __SH4_NOFPU__ */
3095 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
3096 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3097 GLOBAL(GCC_pop_shmedia_regs_nofpu):
3098 #endif /* ! __SH4_NOFPU__ */
3135 #ifndef __SH4_NOFPU__
3136 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3138 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3140 #endif /* __SH5__ == 32 */
3141 #endif /* L_push_pop_shmedia_regs */
3145 #if defined(__pic__) && defined(__SHMEDIA__)
3146 .global GLOBAL(sdivsi3)
3147 FUNC(GLOBAL(sdivsi3))
3149 .section .text..SHmedia32,"ax"
3154 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3155 in a text section does not work (at least for shared libraries):
3156 the linker sets the LSB of the address as if this was SHmedia code. */
3157 #define TEXT_DATA_BUG
3161 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3163 .global GLOBAL(sdivsi3)
3165 #ifdef TEXT_DATA_BUG
3166 ptb datalabel Local_div_table,tr0
3168 ptb GLOBAL(div_table_internal),tr0
3171 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3172 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3175 ldx.ub r20, r21, r19 // u0.8
3176 shari r25, 32, r25 // normalize to s2.30
3178 muls.l r25, r19, r19 // s2.38
3179 ldx.w r20, r21, r21 // s2.14
3181 shari r19, 24, r19 // truncate to s2.14
3182 sub r21, r19, r19 // some 11 bit inverse in s1.14
3183 muls.l r19, r19, r21 // u0.28
3186 muls.l r25, r21, r18 // s2.58
3187 shlli r19, 45, r19 // multiply by two and convert to s2.58
3190 shari r18, 28, r18 // some 22 bit inverse in s1.30
3191 muls.l r18, r25, r0 // s2.60
3192 muls.l r18, r4, r25 // s32.30
3194 shari r0, 16, r19 // s-16.44
3195 muls.l r19, r18, r19 // s-16.74
3197 shari r4, 14, r18 // s19.-14
3198 shari r19, 30, r19 // s-16.44
3199 muls.l r19, r18, r19 // s15.30
3200 xor r21, r0, r21 // You could also use the constant 1 << 27.
3206 ENDFUNC(GLOBAL(sdivsi3))
3207 /* This table has been generated by divtab.c .
3208 Defects for bias -330:
3209 Max defect: 6.081536e-07 at -1.000000e+00
3210 Min defect: 2.849516e-08 at 1.030651e+00
3211 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3212 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3213 Defect at 1: 1.238659e-07
3214 Defect at -2: 1.061708e-07 */
3215 #else /* ! __pic__ || ! __SHMEDIA__ */
3217 #endif /* __pic__ */
3218 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3220 .type Local_div_table,@object
3221 .size Local_div_table,128
3222 /* negative division constants */
3239 /* negative division factors */
3259 /* positive division factors */
3276 /* positive division constants */
3294 #endif /* TEXT_DATA_BUG */
3296 .type GLOBAL(div_table),@object
3297 .size GLOBAL(div_table),128
3298 /* negative division constants */
3315 /* negative division factors */
3333 .global GLOBAL(div_table)
3335 HIDDEN_ALIAS(div_table_internal,div_table)
3337 /* positive division factors */
3354 /* positive division constants */
3372 #elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3373 /* This code uses shld, thus is not suitable for SH1 / SH2. */
3375 /* Signed / unsigned division without use of FPU, optimized for SH4.
3376 Uses a lookup table for divisors in the range -128 .. +128, and
3377 div1 with case distinction for larger divisors in three more ranges.
3378 The code is lumped together with the table to allow the use of mova. */
3379 #ifdef __LITTLE_ENDIAN__
3390 .global GLOBAL(udivsi3_i4i)
3391 FUNC(GLOBAL(udivsi3_i4i))
3392 GLOBAL(udivsi3_i4i):
3393 mov.w LOCAL(c128_w), r1
3399 bf LOCAL(udiv_le128)
3401 bf LOCAL(udiv_ge64k)
3414 mova LOCAL(div_table_ix),r0
3415 bra LOCAL(div_le128_2)
3419 mova LOCAL(div_table_ix),r0
3423 mova LOCAL(div_table_inv),r0
3427 mova LOCAL(div_table_clz),r0
3430 bt/s LOCAL(div_by_1)
3441 LOCAL(div_by_1_neg):
3452 bra LOCAL(div_ge64k_2)
3464 mov.l LOCAL(zero_l),r1
3470 mov.w LOCAL(m256_w),r1
3472 mov.b r0,@(L_LSWMSB,r15)
3475 bra LOCAL(div_ge64k_end)
3497 rotcl r0; div1 r5,r1
3506 ENDFUNC(GLOBAL(udivsi3_i4i))
3508 .global GLOBAL(sdivsi3_i4i)
3509 FUNC(GLOBAL(sdivsi3_i4i))
3510 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3511 but we effectively clobber only r1. */
3512 GLOBAL(sdivsi3_i4i):
3515 mov.w LOCAL(c128_w), r1
3516 bt/s LOCAL(pos_divisor)
3520 bt/s LOCAL(neg_result)
3529 bf/s LOCAL(div_ge64k)
3537 mov.l LOCAL(zero_l),r1
3544 mov.b r0,@(L_MSWLSB,r15)
3550 mov.b r0,@(L_LSWMSB,r15)
3551 LOCAL(div_ge64k_end):
3555 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3563 LOCAL(div_le128_neg):
3565 mova LOCAL(div_table_ix),r0
3567 mova LOCAL(div_table_inv),r0
3568 bt/s LOCAL(div_by_1_neg)
3570 mova LOCAL(div_table_clz),r0
3585 bt/s LOCAL(pos_result)
3590 bf LOCAL(div_le128_neg)
3594 bf/s LOCAL(div_ge64k_neg)
3597 mov.l LOCAL(zero_l),r1
3604 mov.b r0,@(L_MSWLSB,r15)
3610 mov.b r0,@(L_LSWMSB,r15)
3611 LOCAL(div_ge64k_neg_end):
3615 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3619 LOCAL(div_r8_neg_end):
3625 LOCAL(div_ge64k_neg):
3626 bt/s LOCAL(div_r8_neg)
3629 mov.l LOCAL(zero_l),r1
3635 mov.w LOCAL(m256_w),r1
3637 mov.b r0,@(L_LSWMSB,r15)
3640 bra LOCAL(div_ge64k_neg_end)
3653 rotcl r1; div1 r5,r0
3657 bra LOCAL(div_r8_neg_end)
3662 /* This table has been generated by divtab-sh4.c. */
3664 LOCAL(div_table_clz):
3793 /* Lookup table translating positive divisor to index into table of
3794 normalized inverse. N.B. the '0' entry is also the last entry of the
3795 previous table, and causes an unaligned access for division by zero. */
3796 LOCAL(div_table_ix):
3926 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3961 LOCAL(div_table_inv):
3994 /* maximum error: 0.987342 scaled: 0.921875*/
3996 ENDFUNC(GLOBAL(sdivsi3_i4i))
3997 #endif /* SH3 / SH4 */
3999 #endif /* L_div_table */
4001 #ifdef L_udiv_qrnnd_16
4003 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
4004 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
4005 /* n1 < d, but n1 might be larger than d1. */
4006 .global GLOBAL(udiv_qrnnd_16)
4008 GLOBAL(udiv_qrnnd_16):
4045 ENDFUNC(GLOBAL(udiv_qrnnd_16))
4046 #endif /* !__SHMEDIA__ */
4047 #endif /* L_udiv_qrnnd_16 */