1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
22 /* Moderately Space-optimized libgcc routines for the Renesas SH /
23 STMicroelectronics ST40 CPUs.
24 Contributed by J"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h"
31 /* 88 bytes; sh4-200 cycle counts:
32 divisor >= 2G: 11 cycles
33 dividend < 2G: 48 cycles
34 dividend >= 2G: divisor != 1: 54 cycles
35 dividend >= 2G, divisor == 1: 22 cycles */
36 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
37 !! args in r4 and r5, result in r0, clobber r1
39 .global GLOBAL(udivsi3_i4i)
40 FUNC(GLOBAL(udivsi3_i4i))
47 bf LOCAL(huge_divisor)
55 bt LOCAL(dividend_adjusted)
61 LOCAL(dividend_adjusted):
69 #else /* !FMOVD_WORKS */
75 bt/s LOCAL(dividend_adjusted)
82 LOCAL(dividend_adjusted):
92 #endif /* !FMOVD_WORKS */
99 .p2align 3 ! make double below 8 byte aligned.
117 ENDFUNC(GLOBAL(udivsi3_i4i))
118 #elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
121 /* With 36 bytes, the following would probably be the most compact
122 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
147 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
149 udiv small divisor: 55 cycles
150 udiv large divisor: 52 cycles
151 sdiv small divisor, positive result: 59 cycles
152 sdiv large divisor, positive result: 56 cycles
153 sdiv small divisor, negative result: 65 cycles (*)
154 sdiv large divisor, negative result: 62 cycles (*)
155 (*): r2 is restored in the rts delay slot and has a lingering latency
156 of two more cycles. */
158 .global GLOBAL(udivsi3_i4i)
159 FUNC(GLOBAL(udivsi3_i4i))
160 FUNC(GLOBAL(sdivsi3_i4i))
168 bf/s LOCAL(large_divisor)
172 LOCAL(sdiv_small_divisor):
195 div1 r5,r4; div1 r5,r4; div1 r5,r4
196 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
207 LOCAL(large_divisor):
209 LOCAL(sdiv_large_divisor):
220 ENDFUNC(GLOBAL(udivsi3_i4i))
222 .global GLOBAL(sdivsi3_i4i)
227 bt/s LOCAL(pos_divisor)
231 bt/s LOCAL(neg_result)
236 bra LOCAL(sdiv_check_divisor)
240 bt/s LOCAL(pos_result)
244 mova LOCAL(negate_result),r0
250 LOCAL(sdiv_check_divisor):
252 bf/s LOCAL(sdiv_large_divisor)
254 bra LOCAL(sdiv_small_divisor)
257 LOCAL(negate_result):
261 ENDFUNC(GLOBAL(sdivsi3_i4i))
262 #endif /* !__SH_FPU_DOUBLE__ */
263 #endif /* L_udivsi3_i4i */
266 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
267 /* 48 bytes, 45 cycles on sh4-200 */
268 !! args in r4 and r5, result in r0, clobber r1
270 .global GLOBAL(sdivsi3_i4i)
271 FUNC(GLOBAL(sdivsi3_i4i))
319 ENDFUNC(GLOBAL(sdivsi3_i4i))
320 #endif /* __SH_FPU_DOUBLE__ */
321 #endif /* L_sdivsi3_i4i */
322 #endif /* !__SHMEDIA__ */