1 /* Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
22 /* Moderately Space-optimized libgcc routines for the Renesas SH /
23 STMicroelectronics ST40 CPUs.
24 Contributed by J"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h"
30 /* 88 bytes; sh4-200 cycle counts:
31 divisor >= 2G: 11 cycles
32 dividend < 2G: 48 cycles
33 dividend >= 2G: divisor != 1: 54 cycles
34 dividend >= 2G, divisor == 1: 22 cycles */
35 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
36 !! args in r4 and r5, result in r0, clobber r1
38 .global GLOBAL(udivsi3_i4i)
39 FUNC(GLOBAL(udivsi3_i4i))
46 bf LOCAL(huge_divisor)
54 bt LOCAL(dividend_adjusted)
60 LOCAL(dividend_adjusted):
68 #else /* !FMOVD_WORKS */
74 bt/s LOCAL(dividend_adjusted)
81 LOCAL(dividend_adjusted):
91 #endif /* !FMOVD_WORKS */
98 .p2align 3 ! make double below 8 byte aligned.
116 ENDFUNC(GLOBAL(udivsi3_i4i))
117 #elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
120 /* With 36 bytes, the following would probably be the most compact
121 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
146 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
148 udiv small divisor: 55 cycles
149 udiv large divisor: 52 cycles
150 sdiv small divisor, positive result: 59 cycles
151 sdiv large divisor, positive result: 56 cycles
152 sdiv small divisor, negative result: 65 cycles (*)
153 sdiv large divisor, negative result: 62 cycles (*)
154 (*): r2 is restored in the rts delay slot and has a lingering latency
155 of two more cycles. */
157 .global GLOBAL(udivsi3_i4i)
158 FUNC(GLOBAL(udivsi3_i4i))
159 FUNC(GLOBAL(sdivsi3_i4i))
167 bf/s LOCAL(large_divisor)
171 LOCAL(sdiv_small_divisor):
194 div1 r5,r4; div1 r5,r4; div1 r5,r4
195 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
206 LOCAL(large_divisor):
208 LOCAL(sdiv_large_divisor):
219 ENDFUNC(GLOBAL(udivsi3_i4i))
221 .global GLOBAL(sdivsi3_i4i)
226 bt/s LOCAL(pos_divisor)
230 bt/s LOCAL(neg_result)
235 bra LOCAL(sdiv_check_divisor)
239 bt/s LOCAL(pos_result)
243 mova LOCAL(negate_result),r0
249 LOCAL(sdiv_check_divisor):
251 bf/s LOCAL(sdiv_large_divisor)
253 bra LOCAL(sdiv_small_divisor)
256 LOCAL(negate_result):
260 ENDFUNC(GLOBAL(sdivsi3_i4i))
261 #endif /* !__SH_FPU_DOUBLE__ */
262 #endif /* L_udivsi3_i4i */
265 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
266 /* 48 bytes, 45 cycles on sh4-200 */
267 !! args in r4 and r5, result in r0, clobber r1
269 .global GLOBAL(sdivsi3_i4i)
270 FUNC(GLOBAL(sdivsi3_i4i))
318 ENDFUNC(GLOBAL(sdivsi3_i4i))
319 #endif /* __SH_FPU_DOUBLE__ */
320 #endif /* L_sdivsi3_i4i */