1 /* Copyright
(C
) 2006 Free Software Foundation
, Inc.
3 This file is free software
; you can redistribute it and/or modify it
4 under the terms of the GNU General
Public License as published by the
5 Free Software Foundation
; either version 2, or (at your option) any
8 In addition to the permissions
in the GNU General
Public License
, the
9 Free Software Foundation gives you unlimited permission to link the
10 compiled version of
this file
into combinations with other programs
,
11 and to distribute those combinations without any restriction coming
12 from the use of
this file.
(The General
Public License restrictions
13 do apply
in other respects
; for example, they cover modification of
14 the file
, and distribution when
not linked
into a combine
17 This file is distributed
in the hope that it will be useful
, but
18 WITHOUT ANY WARRANTY
; without even the implied warranty of
19 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General
Public License for more details.
22 You should have received a copy of the GNU General
Public License
23 along with
this program
; see the file COPYING. If not, write to
24 the Free Software Foundation
, 51 Franklin Street
, Fifth Floor
,
25 Boston
, MA
02110-1301, USA.
*/
27 /* Moderately Space
-optimized libgcc routines for the Renesas SH
/
28 STMicroelectronics ST40 CPUs.
29 Contributed by J
"orn Rennecke joern.rennecke@st.com. */
31 #include "lib1funcs.h
"
36 /* 88 bytes; sh4-200 cycle counts:
37 divisor >= 2G: 11 cycles
38 dividend < 2G: 48 cycles
39 dividend >= 2G: divisor != 1: 54 cycles
40 dividend >= 2G, divisor == 1: 22 cycles */
41 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
42 !! args in r4 and r5, result in r0, clobber r1
44 .global GLOBAL(udivsi3_i4i)
45 FUNC(GLOBAL(udivsi3_i4i))
52 bf LOCAL(huge_divisor)
60 bt LOCAL(dividend_adjusted)
66 LOCAL(dividend_adjusted):
74 #else /* !FMOVD_WORKS */
80 bt/s LOCAL(dividend_adjusted)
87 LOCAL(dividend_adjusted):
97 #endif /* !FMOVD_WORKS */
104 .p2align 3 ! make double below 8 byte aligned.
122 ENDFUNC(GLOBAL(udivsi3_i4i))
123 #elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
126 /* With 36 bytes, the following would probably be the most compact
127 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
152 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
154 udiv small divisor: 55 cycles
155 udiv large divisor: 52 cycles
156 sdiv small divisor, positive result: 59 cycles
157 sdiv large divisor, positive result: 56 cycles
158 sdiv small divisor, negative result: 65 cycles (*)
159 sdiv large divisor, negative result: 62 cycles (*)
160 (*): r2 is restored in the rts delay slot and has a lingering latency
161 of two more cycles. */
163 .global GLOBAL(udivsi3_i4i)
164 FUNC(GLOBAL(udivsi3_i4i))
165 FUNC(GLOBAL(sdivsi3_i4i))
173 bf/s LOCAL(large_divisor)
177 LOCAL(sdiv_small_divisor):
200 div1 r5,r4; div1 r5,r4; div1 r5,r4
201 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
212 LOCAL(large_divisor):
214 LOCAL(sdiv_large_divisor):
225 ENDFUNC(GLOBAL(udivsi3_i4i))
227 .global GLOBAL(sdivsi3_i4i)
232 bt/s LOCAL(pos_divisor)
236 bt/s LOCAL(neg_result)
241 bra LOCAL(sdiv_check_divisor)
245 bt/s LOCAL(pos_result)
249 mova LOCAL(negate_result),r0
255 LOCAL(sdiv_check_divisor):
257 bf/s LOCAL(sdiv_large_divisor)
259 bra LOCAL(sdiv_small_divisor)
262 LOCAL(negate_result):
266 ENDFUNC(GLOBAL(sdivsi3_i4i))
267 #endif /* !__SH_FPU_DOUBLE__ */
268 #endif /* L_udivsi3_i4i */
271 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
272 /* 48 bytes, 45 cycles on sh4-200 */
273 !! args in r4 and r5, result in r0, clobber r1
275 .global GLOBAL(sdivsi3_i4i)
276 FUNC(GLOBAL(sdivsi3_i4i))
324 ENDFUNC(GLOBAL(sdivsi3_i4i))
325 #endif /* __SH_FPU_DOUBLE__ */
326 #endif /* L_sdivsi3_i4i */
327 #endif /* !__SHMEDIA__ */