1 /* Copyright (C) 1994-2017 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
23 !! libgcc routines for the Renesas / SuperH SH CPUs.
24 !! Contributed by Steve Chamberlain.
27 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28 !! recoded in assembly by Toshiyasu Morita
31 #if defined(__ELF__) && defined(__linux__)
32 .section .note.GNU-stack,"",%progbits
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
40 #include "lib1funcs.h"
42 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 so it is more convenient to define NO_FPSCR_VALUES here than to
44 define it on the command line. */
45 #if defined __vxworks && defined __PIC__
46 #define NO_FPSCR_VALUES
50 .global GLOBAL(ashiftrt_r4_0)
51 .global GLOBAL(ashiftrt_r4_1)
52 .global GLOBAL(ashiftrt_r4_2)
53 .global GLOBAL(ashiftrt_r4_3)
54 .global GLOBAL(ashiftrt_r4_4)
55 .global GLOBAL(ashiftrt_r4_5)
56 .global GLOBAL(ashiftrt_r4_6)
57 .global GLOBAL(ashiftrt_r4_7)
58 .global GLOBAL(ashiftrt_r4_8)
59 .global GLOBAL(ashiftrt_r4_9)
60 .global GLOBAL(ashiftrt_r4_10)
61 .global GLOBAL(ashiftrt_r4_11)
62 .global GLOBAL(ashiftrt_r4_12)
63 .global GLOBAL(ashiftrt_r4_13)
64 .global GLOBAL(ashiftrt_r4_14)
65 .global GLOBAL(ashiftrt_r4_15)
66 .global GLOBAL(ashiftrt_r4_16)
67 .global GLOBAL(ashiftrt_r4_17)
68 .global GLOBAL(ashiftrt_r4_18)
69 .global GLOBAL(ashiftrt_r4_19)
70 .global GLOBAL(ashiftrt_r4_20)
71 .global GLOBAL(ashiftrt_r4_21)
72 .global GLOBAL(ashiftrt_r4_22)
73 .global GLOBAL(ashiftrt_r4_23)
74 .global GLOBAL(ashiftrt_r4_24)
75 .global GLOBAL(ashiftrt_r4_25)
76 .global GLOBAL(ashiftrt_r4_26)
77 .global GLOBAL(ashiftrt_r4_27)
78 .global GLOBAL(ashiftrt_r4_28)
79 .global GLOBAL(ashiftrt_r4_29)
80 .global GLOBAL(ashiftrt_r4_30)
81 .global GLOBAL(ashiftrt_r4_31)
82 .global GLOBAL(ashiftrt_r4_32)
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
119 GLOBAL(ashiftrt_r4_32):
120 GLOBAL(ashiftrt_r4_31):
125 GLOBAL(ashiftrt_r4_30):
127 GLOBAL(ashiftrt_r4_29):
129 GLOBAL(ashiftrt_r4_28):
131 GLOBAL(ashiftrt_r4_27):
133 GLOBAL(ashiftrt_r4_26):
135 GLOBAL(ashiftrt_r4_25):
137 GLOBAL(ashiftrt_r4_24):
143 GLOBAL(ashiftrt_r4_23):
145 GLOBAL(ashiftrt_r4_22):
147 GLOBAL(ashiftrt_r4_21):
149 GLOBAL(ashiftrt_r4_20):
151 GLOBAL(ashiftrt_r4_19):
153 GLOBAL(ashiftrt_r4_18):
155 GLOBAL(ashiftrt_r4_17):
157 GLOBAL(ashiftrt_r4_16):
162 GLOBAL(ashiftrt_r4_15):
164 GLOBAL(ashiftrt_r4_14):
166 GLOBAL(ashiftrt_r4_13):
168 GLOBAL(ashiftrt_r4_12):
170 GLOBAL(ashiftrt_r4_11):
172 GLOBAL(ashiftrt_r4_10):
174 GLOBAL(ashiftrt_r4_9):
176 GLOBAL(ashiftrt_r4_8):
178 GLOBAL(ashiftrt_r4_7):
180 GLOBAL(ashiftrt_r4_6):
182 GLOBAL(ashiftrt_r4_5):
184 GLOBAL(ashiftrt_r4_4):
186 GLOBAL(ashiftrt_r4_3):
188 GLOBAL(ashiftrt_r4_2):
190 GLOBAL(ashiftrt_r4_1):
194 GLOBAL(ashiftrt_r4_0):
198 ENDFUNC(GLOBAL(ashiftrt_r4_0))
199 ENDFUNC(GLOBAL(ashiftrt_r4_1))
200 ENDFUNC(GLOBAL(ashiftrt_r4_2))
201 ENDFUNC(GLOBAL(ashiftrt_r4_3))
202 ENDFUNC(GLOBAL(ashiftrt_r4_4))
203 ENDFUNC(GLOBAL(ashiftrt_r4_5))
204 ENDFUNC(GLOBAL(ashiftrt_r4_6))
205 ENDFUNC(GLOBAL(ashiftrt_r4_7))
206 ENDFUNC(GLOBAL(ashiftrt_r4_8))
207 ENDFUNC(GLOBAL(ashiftrt_r4_9))
208 ENDFUNC(GLOBAL(ashiftrt_r4_10))
209 ENDFUNC(GLOBAL(ashiftrt_r4_11))
210 ENDFUNC(GLOBAL(ashiftrt_r4_12))
211 ENDFUNC(GLOBAL(ashiftrt_r4_13))
212 ENDFUNC(GLOBAL(ashiftrt_r4_14))
213 ENDFUNC(GLOBAL(ashiftrt_r4_15))
214 ENDFUNC(GLOBAL(ashiftrt_r4_16))
215 ENDFUNC(GLOBAL(ashiftrt_r4_17))
216 ENDFUNC(GLOBAL(ashiftrt_r4_18))
217 ENDFUNC(GLOBAL(ashiftrt_r4_19))
218 ENDFUNC(GLOBAL(ashiftrt_r4_20))
219 ENDFUNC(GLOBAL(ashiftrt_r4_21))
220 ENDFUNC(GLOBAL(ashiftrt_r4_22))
221 ENDFUNC(GLOBAL(ashiftrt_r4_23))
222 ENDFUNC(GLOBAL(ashiftrt_r4_24))
223 ENDFUNC(GLOBAL(ashiftrt_r4_25))
224 ENDFUNC(GLOBAL(ashiftrt_r4_26))
225 ENDFUNC(GLOBAL(ashiftrt_r4_27))
226 ENDFUNC(GLOBAL(ashiftrt_r4_28))
227 ENDFUNC(GLOBAL(ashiftrt_r4_29))
228 ENDFUNC(GLOBAL(ashiftrt_r4_30))
229 ENDFUNC(GLOBAL(ashiftrt_r4_31))
230 ENDFUNC(GLOBAL(ashiftrt_r4_32))
252 .global GLOBAL(ashrsi3)
253 HIDDEN_FUNC(GLOBAL(ashrsi3))
258 mova LOCAL(ashrsi3_table),r0
269 LOCAL(ashrsi3_table):
270 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
381 ENDFUNC(GLOBAL(ashrsi3))
388 ! (For compatibility with older binaries, not used by compiler)
413 .global GLOBAL(ashlsi3)
414 .global GLOBAL(ashlsi3_r0)
415 HIDDEN_FUNC(GLOBAL(ashlsi3))
416 HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
427 mova LOCAL(ashlsi3_table),r0
440 LOCAL(ashlsi3_table):
446 LOCAL(ashlsi_2): // << 2
449 bra LOCAL(ashlsi_1) // << 3
451 bra LOCAL(ashlsi_2) // << 4
453 bra LOCAL(ashlsi_5) // << 5
455 bra LOCAL(ashlsi_6) // << 6
457 bra LOCAL(ashlsi_7) // << 7
459 LOCAL(ashlsi_8): // << 8
462 bra LOCAL(ashlsi_8) // << 9
464 bra LOCAL(ashlsi_8) // << 10
466 bra LOCAL(ashlsi_11) // << 11
468 bra LOCAL(ashlsi_12) // << 12
470 bra LOCAL(ashlsi_13) // << 13
472 bra LOCAL(ashlsi_14) // << 14
474 bra LOCAL(ashlsi_15) // << 15
476 LOCAL(ashlsi_16): // << 16
479 bra LOCAL(ashlsi_16) // << 17
481 bra LOCAL(ashlsi_16) // << 18
483 bra LOCAL(ashlsi_19) // << 19
485 bra LOCAL(ashlsi_20) // << 20
487 bra LOCAL(ashlsi_21) // << 21
489 bra LOCAL(ashlsi_22) // << 22
491 bra LOCAL(ashlsi_23) // << 23
493 bra LOCAL(ashlsi_16) // << 24
495 bra LOCAL(ashlsi_25) // << 25
497 bra LOCAL(ashlsi_26) // << 26
499 bra LOCAL(ashlsi_27) // << 27
501 bra LOCAL(ashlsi_28) // << 28
503 bra LOCAL(ashlsi_29) // << 29
505 bra LOCAL(ashlsi_30) // << 30
557 ENDFUNC(GLOBAL(ashlsi3))
558 ENDFUNC(GLOBAL(ashlsi3_r0))
565 ! (For compatibility with older binaries, not used by compiler)
590 .global GLOBAL(lshrsi3)
591 .global GLOBAL(lshrsi3_r0)
592 HIDDEN_FUNC(GLOBAL(lshrsi3))
593 HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
604 mova LOCAL(lshrsi3_table),r0
616 LOCAL(lshrsi3_table):
619 LOCAL(lshrsi_1): // >> 1
622 LOCAL(lshrsi_2): // >> 2
625 bra LOCAL(lshrsi_1) // >> 3
627 bra LOCAL(lshrsi_2) // >> 4
629 bra LOCAL(lshrsi_5) // >> 5
631 bra LOCAL(lshrsi_6) // >> 6
633 bra LOCAL(lshrsi_7) // >> 7
635 LOCAL(lshrsi_8): // >> 8
638 bra LOCAL(lshrsi_8) // >> 9
640 bra LOCAL(lshrsi_8) // >> 10
642 bra LOCAL(lshrsi_11) // >> 11
644 bra LOCAL(lshrsi_12) // >> 12
646 bra LOCAL(lshrsi_13) // >> 13
648 bra LOCAL(lshrsi_14) // >> 14
650 bra LOCAL(lshrsi_15) // >> 15
652 LOCAL(lshrsi_16): // >> 16
655 bra LOCAL(lshrsi_16) // >> 17
657 bra LOCAL(lshrsi_16) // >> 18
659 bra LOCAL(lshrsi_19) // >> 19
661 bra LOCAL(lshrsi_20) // >> 20
663 bra LOCAL(lshrsi_21) // >> 21
665 bra LOCAL(lshrsi_22) // >> 22
667 bra LOCAL(lshrsi_23) // >> 23
669 bra LOCAL(lshrsi_16) // >> 24
671 bra LOCAL(lshrsi_25) // >> 25
673 bra LOCAL(lshrsi_26) // >> 26
675 bra LOCAL(lshrsi_27) // >> 27
677 bra LOCAL(lshrsi_28) // >> 28
679 bra LOCAL(lshrsi_29) // >> 29
681 bra LOCAL(lshrsi_30) // >> 30
733 ENDFUNC(GLOBAL(lshrsi3))
734 ENDFUNC(GLOBAL(lshrsi3_r0))
740 .global GLOBAL(movmem)
741 HIDDEN_FUNC(GLOBAL(movmem))
742 HIDDEN_ALIAS(movstr,movmem)
743 /* This would be a lot simpler if r6 contained the byte count
744 minus 64, and we wouldn't be called here for a byte count of 64. */
748 bsr GLOBAL(movmemSI52+2)
751 LOCAL(movmem_loop): /* Reached with rts */
757 bt LOCAL(movmem_done)
764 bt GLOBAL(movmemSI52)
765 ! done all the large groups, do the remainder
767 mova GLOBAL(movmemSI4)+4,r0
770 LOCAL(movmem_done): ! share slot insn, works out aligned.
777 ! ??? We need aliases movstr* for movmem* for the older libraries. These
778 ! aliases will be removed at the some point in the future.
779 .global GLOBAL(movmemSI64)
780 HIDDEN_FUNC(GLOBAL(movmemSI64))
781 HIDDEN_ALIAS(movstrSI64,movmemSI64)
785 .global GLOBAL(movmemSI60)
786 HIDDEN_FUNC(GLOBAL(movmemSI60))
787 HIDDEN_ALIAS(movstrSI60,movmemSI60)
791 .global GLOBAL(movmemSI56)
792 HIDDEN_FUNC(GLOBAL(movmemSI56))
793 HIDDEN_ALIAS(movstrSI56,movmemSI56)
797 .global GLOBAL(movmemSI52)
798 HIDDEN_FUNC(GLOBAL(movmemSI52))
799 HIDDEN_ALIAS(movstrSI52,movmemSI52)
803 .global GLOBAL(movmemSI48)
804 HIDDEN_FUNC(GLOBAL(movmemSI48))
805 HIDDEN_ALIAS(movstrSI48,movmemSI48)
809 .global GLOBAL(movmemSI44)
810 HIDDEN_FUNC(GLOBAL(movmemSI44))
811 HIDDEN_ALIAS(movstrSI44,movmemSI44)
815 .global GLOBAL(movmemSI40)
816 HIDDEN_FUNC(GLOBAL(movmemSI40))
817 HIDDEN_ALIAS(movstrSI40,movmemSI40)
821 .global GLOBAL(movmemSI36)
822 HIDDEN_FUNC(GLOBAL(movmemSI36))
823 HIDDEN_ALIAS(movstrSI36,movmemSI36)
827 .global GLOBAL(movmemSI32)
828 HIDDEN_FUNC(GLOBAL(movmemSI32))
829 HIDDEN_ALIAS(movstrSI32,movmemSI32)
833 .global GLOBAL(movmemSI28)
834 HIDDEN_FUNC(GLOBAL(movmemSI28))
835 HIDDEN_ALIAS(movstrSI28,movmemSI28)
839 .global GLOBAL(movmemSI24)
840 HIDDEN_FUNC(GLOBAL(movmemSI24))
841 HIDDEN_ALIAS(movstrSI24,movmemSI24)
845 .global GLOBAL(movmemSI20)
846 HIDDEN_FUNC(GLOBAL(movmemSI20))
847 HIDDEN_ALIAS(movstrSI20,movmemSI20)
851 .global GLOBAL(movmemSI16)
852 HIDDEN_FUNC(GLOBAL(movmemSI16))
853 HIDDEN_ALIAS(movstrSI16,movmemSI16)
857 .global GLOBAL(movmemSI12)
858 HIDDEN_FUNC(GLOBAL(movmemSI12))
859 HIDDEN_ALIAS(movstrSI12,movmemSI12)
863 .global GLOBAL(movmemSI8)
864 HIDDEN_FUNC(GLOBAL(movmemSI8))
865 HIDDEN_ALIAS(movstrSI8,movmemSI8)
869 .global GLOBAL(movmemSI4)
870 HIDDEN_FUNC(GLOBAL(movmemSI4))
871 HIDDEN_ALIAS(movstrSI4,movmemSI4)
877 ENDFUNC(GLOBAL(movmemSI64))
878 ENDFUNC(GLOBAL(movmemSI60))
879 ENDFUNC(GLOBAL(movmemSI56))
880 ENDFUNC(GLOBAL(movmemSI52))
881 ENDFUNC(GLOBAL(movmemSI48))
882 ENDFUNC(GLOBAL(movmemSI44))
883 ENDFUNC(GLOBAL(movmemSI40))
884 ENDFUNC(GLOBAL(movmemSI36))
885 ENDFUNC(GLOBAL(movmemSI32))
886 ENDFUNC(GLOBAL(movmemSI28))
887 ENDFUNC(GLOBAL(movmemSI24))
888 ENDFUNC(GLOBAL(movmemSI20))
889 ENDFUNC(GLOBAL(movmemSI16))
890 ENDFUNC(GLOBAL(movmemSI12))
891 ENDFUNC(GLOBAL(movmemSI8))
892 ENDFUNC(GLOBAL(movmemSI4))
893 ENDFUNC(GLOBAL(movmem))
898 .global GLOBAL(movmem_i4_even)
899 .global GLOBAL(movmem_i4_odd)
900 .global GLOBAL(movmemSI12_i4)
902 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
903 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
904 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
906 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
907 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
908 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
918 GLOBAL(movmem_i4_even):
920 bra L_movmem_start_even
923 GLOBAL(movmem_i4_odd):
935 bt/s L_movmem_2mod4_end
949 ENDFUNC(GLOBAL(movmem_i4_even))
950 ENDFUNC(GLOBAL(movmem_i4_odd))
953 GLOBAL(movmemSI12_i4):
962 ENDFUNC(GLOBAL(movmemSI12_i4))
968 .global GLOBAL(mulsi3)
969 HIDDEN_FUNC(GLOBAL(mulsi3))
973 ! r0 = aabb*ccdd via partial products
975 ! if aa == 0 and cc = 0
979 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
983 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
984 mov r5,r3 ! r3 = ccdd
985 swap.w r4,r2 ! r2 = bbaa
986 xtrct r2,r3 ! r3 = aacc
987 tst r3,r3 ! msws zero ?
989 rts ! yes - then we have the answer
992 hiset: sts macl,r0 ! r0 = bb*dd
993 mulu.w r2,r5 ! brewing macl = aa*dd
995 mulu.w r3,r4 ! brewing macl = cc*bb
1002 ENDFUNC(GLOBAL(mulsi3))
1005 /*------------------------------------------------------------------------------
1006 32 bit signed integer division that uses FPU double precision division. */
1011 #if defined (__SH4__) || defined (__SH2A__)
1012 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1014 Args in r4 and r5, result in fpul, clobber dr0, dr2. */
1016 .global GLOBAL(sdivsi3_i4)
1017 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1027 ENDFUNC(GLOBAL(sdivsi3_i4))
1029 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1030 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1032 Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1033 For this to work, we must temporarily switch the FPU do double precision,
1034 but we better do not touch FPSCR.FR. See PR 6526. */
1036 .global GLOBAL(sdivsi3_i4)
1037 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1044 swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit)
1046 lds r3,fpscr // Set FPSCR.PR = 1.
1057 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */
1068 #endif /* __SH4A__ */
1070 ENDFUNC(GLOBAL(sdivsi3_i4))
1071 #endif /* ! __SH4__ || __SH2A__ */
1072 #endif /* L_sdivsi3_i4 */
1074 //------------------------------------------------------------------------------
1076 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1079 !! Steve Chamberlain
1084 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1086 .global GLOBAL(sdivsi3)
1089 FUNC(GLOBAL(sdivsi3))
1174 ENDFUNC(GLOBAL(sdivsi3))
1175 #endif /* L_sdivsi3 */
1177 /*------------------------------------------------------------------------------
1178 32 bit unsigned integer division that uses FPU double precision division. */
1183 #if defined (__SH4__) || defined (__SH2A__)
1184 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1186 Args in r4 and r5, result in fpul,
1187 clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */
1189 .global GLOBAL(udivsi3_i4)
1190 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1221 .align 3 // Make the double below 8 byte aligned.
1226 ENDFUNC(GLOBAL(udivsi3_i4))
1228 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1229 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1231 Args in r4 and r5, result in fpul,
1232 clobber r0, r1, r4, r5, dr0, dr2, dr4.
1233 For this to work, we must temporarily switch the FPU do double precision,
1234 but we better do not touch FPSCR.FR. See PR 6526. */
1236 .global GLOBAL(udivsi3_i4)
1237 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1244 rotr r1 // r1 = 1 << 31
1271 .align 3 // Make the double below 8 byte aligned.
1280 .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1
1282 .long 0x80000 // FPSCR.PR = 1
1288 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1289 Although on SH4A fmovd usually works, it would require either additional
1290 two fschg instructions or an FPSCR push + pop. It's not worth the effort
1291 for loading only one double constant. */
1295 rotr r1 // r1 = 1 << 31
1321 #endif /* __SH4A__ */
1324 ENDFUNC(GLOBAL(udivsi3_i4))
1325 #endif /* ! __SH4__ */
1326 #endif /* L_udivsi3_i4 */
1329 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1332 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1333 .global GLOBAL(udivsi3)
1334 HIDDEN_FUNC(GLOBAL(udivsi3))
1339 div1 r5,r4; div1 r5,r4; div1 r5,r4
1340 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1343 div1 r5,r4; rotcl r0
1344 div1 r5,r4; rotcl r0
1345 div1 r5,r4; rotcl r0
1353 bf LOCAL(large_divisor)
1355 bf/s LOCAL(large_divisor)
1377 LOCAL(large_divisor):
1396 ENDFUNC(GLOBAL(udivsi3))
1397 #endif /* L_udivsi3 */
1400 #if !defined (__SH2A_NOFPU__)
1401 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1402 .global GLOBAL(set_fpscr)
1403 HIDDEN_FUNC(GLOBAL(set_fpscr))
1409 mov.l LOCAL(set_fpscr_L0_base),r12
1410 mov.l LOCAL(set_fpscr_L0_index),r0
1414 mova LOCAL(set_fpscr_L0),r0
1415 mov.l LOCAL(set_fpscr_L0),r12
1418 mov.l LOCAL(set_fpscr_L1),r0
1422 mov.l LOCAL(set_fpscr_L1),r1
1429 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1432 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1441 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1445 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1453 LOCAL(set_fpscr_L0_base):
1454 .long ___GOTT_BASE__
1455 LOCAL(set_fpscr_L0_index):
1456 .long ___GOTT_INDEX__
1458 LOCAL(set_fpscr_L0):
1459 .long _GLOBAL_OFFSET_TABLE_
1461 LOCAL(set_fpscr_L1):
1462 .long GLOBAL(fpscr_values@GOT)
1464 LOCAL(set_fpscr_L1):
1465 .long GLOBAL(fpscr_values)
1468 ENDFUNC(GLOBAL(set_fpscr))
1469 #ifndef NO_FPSCR_VALUES
1471 .comm GLOBAL(fpscr_values),8,4
1473 .comm GLOBAL(fpscr_values),8
1475 #endif /* NO_FPSCR_VALUES */
1476 #endif /* SH2E / SH3E / SH4 */
1477 #endif /* __SH2A_NOFPU__ */
1478 #endif /* L_set_fpscr */
1479 #ifdef L_ic_invalidate
1481 #if defined(__SH4A__)
1482 .global GLOBAL(ic_invalidate)
1483 HIDDEN_FUNC(GLOBAL(ic_invalidate))
1484 GLOBAL(ic_invalidate):
1490 ENDFUNC(GLOBAL(ic_invalidate))
1491 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1492 /* For system code, we use ic_invalidate_line_i, but user code
1493 needs a different mechanism. A kernel call is generally not
1494 available, and it would also be slow. Different SH4 variants use
1495 different sizes and associativities of the Icache. We use a small
1496 bit of dispatch code that can be put hidden in every shared object,
1497 which calls the actual processor-specific invalidation code in a
1499 Or if you have operating system support, the OS could mmap the
1500 procesor-specific code from a single page, since it is highly
1502 .global GLOBAL(ic_invalidate)
1503 HIDDEN_FUNC(GLOBAL(ic_invalidate))
1504 GLOBAL(ic_invalidate):
1531 0: .long GLOBAL(ic_invalidate_array)
1533 .global GLOBAL(ic_invalidate_array)
1534 0: .long GLOBAL(ic_invalidate_array)@GOT
1536 1: .long ___GOTT_BASE__
1537 2: .long ___GOTT_INDEX__
1539 1: .long _GLOBAL_OFFSET_TABLE_
1541 ENDFUNC(GLOBAL(ic_invalidate))
1542 #endif /* __pic__ */
1544 #endif /* L_ic_invalidate */
1546 #ifdef L_ic_invalidate_array
1547 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)))
1548 .global GLOBAL(ic_invalidate_array)
1549 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
1550 .global GLOBAL(ic_invalidate_array)
1551 FUNC(GLOBAL(ic_invalidate_array))
1552 GLOBAL(ic_invalidate_array):
1560 ENDFUNC(GLOBAL(ic_invalidate_array))
1561 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1562 .global GLOBAL(ic_invalidate_array)
1564 FUNC(GLOBAL(ic_invalidate_array))
1565 /* This must be aligned to the beginning of a cache line. */
1566 GLOBAL(ic_invalidate_array):
1569 #define WAY_SIZE 0x4000
1572 .rept WAY_SIZE * WAYS / 32
1580 .rept WAY_SIZE * WAYS / 32
1594 #else /* WAYS > 6 */
1595 /* This variant needs two different pages for mmap-ing. */
1613 ENDFUNC(GLOBAL(ic_invalidate_array))
1615 #endif /* L_ic_invalidate_array */
1620 #if defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
1621 /* This code uses shld, thus is not suitable for SH1 / SH2. */
1623 /* Signed / unsigned division without use of FPU, optimized for SH4.
1624 Uses a lookup table for divisors in the range -128 .. +128, and
1625 div1 with case distinction for larger divisors in three more ranges.
1626 The code is lumped together with the table to allow the use of mova. */
1627 #ifdef __LITTLE_ENDIAN__
1638 .global GLOBAL(udivsi3_i4i)
1639 FUNC(GLOBAL(udivsi3_i4i))
1640 GLOBAL(udivsi3_i4i):
1641 mov.w LOCAL(c128_w), r1
1647 bf LOCAL(udiv_le128)
1649 bf LOCAL(udiv_ge64k)
1662 mova LOCAL(div_table_ix),r0
1663 bra LOCAL(div_le128_2)
1667 mova LOCAL(div_table_ix),r0
1671 mova LOCAL(div_table_inv),r0
1675 mova LOCAL(div_table_clz),r0
1678 bt/s LOCAL(div_by_1)
1689 LOCAL(div_by_1_neg):
1700 bra LOCAL(div_ge64k_2)
1712 mov.l LOCAL(zero_l),r1
1718 mov.w LOCAL(m256_w),r1
1720 mov.b r0,@(L_LSWMSB,r15)
1723 bra LOCAL(div_ge64k_end)
1745 rotcl r0; div1 r5,r1
1754 ENDFUNC(GLOBAL(udivsi3_i4i))
1756 .global GLOBAL(sdivsi3_i4i)
1757 FUNC(GLOBAL(sdivsi3_i4i))
1758 /* This is link-compatible with a GLOBAL(sdivsi3) call,
1759 but we effectively clobber only r1. */
1760 GLOBAL(sdivsi3_i4i):
1763 mov.w LOCAL(c128_w), r1
1764 bt/s LOCAL(pos_divisor)
1768 bt/s LOCAL(neg_result)
1777 bf/s LOCAL(div_ge64k)
1785 mov.l LOCAL(zero_l),r1
1792 mov.b r0,@(L_MSWLSB,r15)
1798 mov.b r0,@(L_LSWMSB,r15)
1799 LOCAL(div_ge64k_end):
1803 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1811 LOCAL(div_le128_neg):
1813 mova LOCAL(div_table_ix),r0
1815 mova LOCAL(div_table_inv),r0
1816 bt/s LOCAL(div_by_1_neg)
1818 mova LOCAL(div_table_clz),r0
1833 bt/s LOCAL(pos_result)
1838 bf LOCAL(div_le128_neg)
1842 bf/s LOCAL(div_ge64k_neg)
1845 mov.l LOCAL(zero_l),r1
1852 mov.b r0,@(L_MSWLSB,r15)
1858 mov.b r0,@(L_LSWMSB,r15)
1859 LOCAL(div_ge64k_neg_end):
1863 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1867 LOCAL(div_r8_neg_end):
1873 LOCAL(div_ge64k_neg):
1874 bt/s LOCAL(div_r8_neg)
1877 mov.l LOCAL(zero_l),r1
1883 mov.w LOCAL(m256_w),r1
1885 mov.b r0,@(L_LSWMSB,r15)
1888 bra LOCAL(div_ge64k_neg_end)
1901 rotcl r1; div1 r5,r0
1905 bra LOCAL(div_r8_neg_end)
1910 /* This table has been generated by divtab-sh4.c. */
1912 LOCAL(div_table_clz):
2041 /* Lookup table translating positive divisor to index into table of
2042 normalized inverse. N.B. the '0' entry is also the last entry of the
2043 previous table, and causes an unaligned access for division by zero. */
2044 LOCAL(div_table_ix):
2174 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
2209 LOCAL(div_table_inv):
2242 /* maximum error: 0.987342 scaled: 0.921875*/
2244 ENDFUNC(GLOBAL(sdivsi3_i4i))
2245 #endif /* SH3 / SH4 */
2247 #endif /* L_div_table */
2249 #ifdef L_udiv_qrnnd_16
2250 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
2251 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
2252 /* n1 < d, but n1 might be larger than d1. */
2253 .global GLOBAL(udiv_qrnnd_16)
2255 GLOBAL(udiv_qrnnd_16):
2292 ENDFUNC(GLOBAL(udiv_qrnnd_16))
2293 #endif /* L_udiv_qrnnd_16 */