1 /* Copyright
(C
) 2006, 2009 Free Software Foundation
, Inc.
3 This file is free software
; you can redistribute it and/or modify it
4 under the terms of the GNU General
Public License as published by the
5 Free Software Foundation
; either version 3, or (at your option) any
8 This file is distributed
in the hope that it will be useful
, but
9 WITHOUT ANY WARRANTY
; without even the implied warranty of
10 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General
Public License for more details.
13 Under
Section 7 of GPL version
3, you are granted additional
14 permissions described
in the GCC Runtime Library Exception
, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General
Public License
and
18 a copy of the GCC Runtime Library Exception along with
this program
;
19 see the files COPYING3
and COPYING.RUNTIME respectively. If
not, see
20 <http://www.gnu.
org/licenses
/>.
*/
22 /* Moderately Space
-optimized libgcc routines for the Renesas SH
/
23 STMicroelectronics ST40 CPUs.
24 Contributed by J
"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h
"
31 /* 88 bytes; sh4-200 cycle counts:
32 divisor >= 2G: 11 cycles
33 dividend < 2G: 48 cycles
34 dividend >= 2G: divisor != 1: 54 cycles
35 dividend >= 2G, divisor == 1: 22 cycles */
36 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
37 !! args in r4 and r5, result in r0, clobber r1
39 .global GLOBAL(udivsi3_i4i)
40 FUNC(GLOBAL(udivsi3_i4i))
47 bf LOCAL(huge_divisor)
55 bt LOCAL(dividend_adjusted)
61 LOCAL(dividend_adjusted):
69 #else /* !FMOVD_WORKS */
75 bt/s LOCAL(dividend_adjusted)
82 LOCAL(dividend_adjusted):
92 #endif /* !FMOVD_WORKS */
99 .p2align 3 ! make double below 8 byte aligned.
117 ENDFUNC(GLOBAL(udivsi3_i4i))
118 #elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
121 /* With 36 bytes, the following would probably be the most compact
122 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
147 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
149 udiv small divisor: 55 cycles
150 udiv large divisor: 52 cycles
151 sdiv small divisor, positive result: 59 cycles
152 sdiv large divisor, positive result: 56 cycles
153 sdiv small divisor, negative result: 65 cycles (*)
154 sdiv large divisor, negative result: 62 cycles (*)
155 (*): r2 is restored in the rts delay slot and has a lingering latency
156 of two more cycles. */
158 .global GLOBAL(udivsi3_i4i)
159 FUNC(GLOBAL(udivsi3_i4i))
160 FUNC(GLOBAL(sdivsi3_i4i))
168 bf/s LOCAL(large_divisor)
172 LOCAL(sdiv_small_divisor):
195 div1 r5,r4; div1 r5,r4; div1 r5,r4
196 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
207 LOCAL(large_divisor):
209 LOCAL(sdiv_large_divisor):
220 ENDFUNC(GLOBAL(udivsi3_i4i))
222 .global GLOBAL(sdivsi3_i4i)
227 bt/s LOCAL(pos_divisor)
231 bt/s LOCAL(neg_result)
236 bra LOCAL(sdiv_check_divisor)
240 bt/s LOCAL(pos_result)
244 mova LOCAL(negate_result),r0
250 LOCAL(sdiv_check_divisor):
252 bf/s LOCAL(sdiv_large_divisor)
254 bra LOCAL(sdiv_small_divisor)
257 LOCAL(negate_result):
261 ENDFUNC(GLOBAL(sdivsi3_i4i))
262 #endif /* !__SH_FPU_DOUBLE__ */
263 #endif /* L_udivsi3_i4i */
266 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
267 /* 48 bytes, 45 cycles on sh4-200 */
268 !! args in r4 and r5, result in r0, clobber r1
270 .global GLOBAL(sdivsi3_i4i)
271 FUNC(GLOBAL(sdivsi3_i4i))
319 ENDFUNC(GLOBAL(sdivsi3_i4i))
320 #endif /* __SH_FPU_DOUBLE__ */
321 #endif /* L_sdivsi3_i4i */
322 #endif /* !__SHMEDIA__ */