1 /* Copyright
(C
) 2004, 2006 Free Software Foundation
, Inc.
3 This file is free software
; you can redistribute it and/or modify it
4 under the terms of the GNU General
Public License as published by the
5 Free Software Foundation
; either version 2, or (at your option) any
8 In addition to the permissions
in the GNU General
Public License
, the
9 Free Software Foundation gives you unlimited permission to link the
10 compiled version of
this file
into combinations with other programs
,
11 and to distribute those combinations without any restriction coming
12 from the use of
this file.
(The General
Public License restrictions
13 do apply
in other respects
; for example, they cover modification of
14 the file
, and distribution when
not linked
into a combine
17 This file is distributed
in the hope that it will be useful
, but
18 WITHOUT ANY WARRANTY
; without even the implied warranty of
19 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General
Public License for more details.
22 You should have received a copy of the GNU General
Public License
23 along with
this program
; see the file COPYING. If not, write to
24 the Free Software Foundation
, 51 Franklin Street
, Fifth Floor
,
25 Boston
, MA
02110-1301, USA.
*/
27 /* libgcc routines for the STMicroelectronics ST40
-300 CPU.
28 Contributed by J
"orn Rennecke joern.rennecke@st.com. */
30 #include "lib1funcs.h
"
34 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
35 /* This code used shld, thus is not suitable for SH1 / SH2. */
37 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
38 Uses a lookup table for divisors in the range -128 .. +127, and
39 div1 with case distinction for larger divisors in three more ranges.
40 The code is lumped together with the table to allow the use of mova. */
41 #ifdef __LITTLE_ENDIAN__
51 .global GLOBAL(udivsi3_i4i)
52 .global GLOBAL(sdivsi3_i4i)
53 FUNC(GLOBAL(udivsi3_i4i))
54 FUNC(GLOBAL(sdivsi3_i4i))
57 LOCAL(div_ge8m): ! 10 cycles up to here
58 rotcr r1 ! signed shift must use original sign from r4
65 swap.w r5,r0 ! detect -0x80000000 : 0x800000
100 ! 31 cycles up to here
103 LOCAL(udiv_ge64k): ! 3 cycles up to here
111 ! 7 cycles up to here
115 extu.b r4,r1 ! 15 cycles up to here
122 .endr ! 25 cycles up to here
128 rotcl r0 ! 28 cycles up to here
131 LOCAL(udiv_r8): ! 6 cycles up to here
143 ! 12 cycles up to here
147 mov.l @r15+,r6 ! 24 cycles up to here
152 LOCAL(div_ge32k): ! 6 cycles up to here
160 cmp/hi r1,r4 ! copy sign bit of r4 into T
161 rotcr r1 ! signed shift must use original sign from r4
166 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
188 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
192 swap.w r7,r7 ! 26 cycles up to here.
208 shad r1,r5 ! 34 cycles up to here
225 extu.b r4,r0 ! 7 cycles up to here
228 .endr ! 15 cycles up to here
229 xor r1,r0 ! xor dividend with result lsb
233 mov.l r7,@-r15 ! 21 cycles up to here
239 xor r7,r1 ! replace lsb of result with lsb of dividend
246 div1 r6,r1 ! 28 cycles up to here
248 /* This is link-compatible with a GLOBAL(sdivsi3) call,
249 but we effectively clobber only r1, macl and mach */
250 /* Because negative quotients are calculated as one's complements,
251 -0x80000000 divided by the smallest positive number of a number
252 range (0x80, 0x8000, 0x800000) causes saturation in the one's
253 complement representation, and we have to suppress the
254 one's -> two's complement adjustment. Since positive numbers
255 don't get such an adjustment, it's OK to also compute one's -> two's
256 complement adjustment suppression for a dividend of 0. */
263 bt/s LOCAL(div_le128)
269 bf/s LOCAL(div_ge32k)
270 cmp/hi r1,r4 ! copy sign bit of r4 into T
272 shll16 r6 ! 7 cycles up to here
278 mov r4,r0 ! re-compute adjusted dividend
284 add r4,r0 ! adjusted dividend
288 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
291 xor r1,r0 ! xor dividend with result lsb
296 add #-0x80,r8 ! r8 is 0 iff there is a match
298 swap.w r8,r7 ! or upper 16 bits...
300 or r7,r8 !...into lower 16 bits
308 xor r7,r1 ! replace lsb of result with lsb of dividend
310 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
316 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
334 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
335 mova LOCAL(div_table_inv),r0
338 mova LOCAL(div_table_clz),r0
359 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
360 mova LOCAL(div_table_inv),r0
363 mova LOCAL(div_table_clz),r0
373 bt/s LOCAL(le128_neg)
383 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
387 bt/s LOCAL(le128_neg)
389 bt LOCAL(div_by_zero)
398 bt LOCAL(div_by_zero)
413 ENDFUNC(GLOBAL(udivsi3_i4i))
414 ENDFUNC(GLOBAL(sdivsi3_i4i))
416 /* This table has been generated by divtab-sh4.c. */
546 LOCAL(div_table_clz):
675 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
676 or in bit 33 for powers of two. */
806 LOCAL(div_table_inv):
935 /* maximum error: 0.987342 scaled: 0.921875*/
937 #endif /* SH3 / SH4 */
939 #endif /* L_div_table */
940 #endif /* !__SHMEDIA__ */