1 /* Copyright
(C
) 2004, 2006, 2009 Free Software Foundation
, Inc.
3 This file is free software
; you can redistribute it and/or modify it
4 under the terms of the GNU General
Public License as published by the
5 Free Software Foundation
; either version 3, or (at your option) any
8 This file is distributed
in the hope that it will be useful
, but
9 WITHOUT ANY WARRANTY
; without even the implied warranty of
10 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General
Public License for more details.
13 Under
Section 7 of GPL version
3, you are granted additional
14 permissions described
in the GCC Runtime Library Exception
, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General
Public License
and
18 a copy of the GCC Runtime Library Exception along with
this program
;
19 see the files COPYING3
and COPYING.RUNTIME respectively. If
not, see
20 <http://www.gnu.
org/licenses
/>.
*/
23 /* libgcc routines for the STMicroelectronics ST40
-300 CPU.
24 Contributed by J
"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h
"
30 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
31 /* This code used shld, thus is not suitable for SH1 / SH2. */
33 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
34 Uses a lookup table for divisors in the range -128 .. +127, and
35 div1 with case distinction for larger divisors in three more ranges.
36 The code is lumped together with the table to allow the use of mova. */
37 #ifdef __LITTLE_ENDIAN__
47 .global GLOBAL(udivsi3_i4i)
48 .global GLOBAL(sdivsi3_i4i)
49 FUNC(GLOBAL(udivsi3_i4i))
50 FUNC(GLOBAL(sdivsi3_i4i))
53 LOCAL(div_ge8m): ! 10 cycles up to here
54 rotcr r1 ! signed shift must use original sign from r4
61 swap.w r5,r0 ! detect -0x80000000 : 0x800000
96 ! 31 cycles up to here
99 LOCAL(udiv_ge64k): ! 3 cycles up to here
107 ! 7 cycles up to here
111 extu.b r4,r1 ! 15 cycles up to here
118 .endr ! 25 cycles up to here
124 rotcl r0 ! 28 cycles up to here
127 LOCAL(udiv_r8): ! 6 cycles up to here
139 ! 12 cycles up to here
143 mov.l @r15+,r6 ! 24 cycles up to here
148 LOCAL(div_ge32k): ! 6 cycles up to here
156 cmp/hi r1,r4 ! copy sign bit of r4 into T
157 rotcr r1 ! signed shift must use original sign from r4
162 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
184 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
188 swap.w r7,r7 ! 26 cycles up to here.
204 shad r1,r5 ! 34 cycles up to here
221 extu.b r4,r0 ! 7 cycles up to here
224 .endr ! 15 cycles up to here
225 xor r1,r0 ! xor dividend with result lsb
229 mov.l r7,@-r15 ! 21 cycles up to here
235 xor r7,r1 ! replace lsb of result with lsb of dividend
242 div1 r6,r1 ! 28 cycles up to here
244 /* This is link-compatible with a GLOBAL(sdivsi3) call,
245 but we effectively clobber only r1, macl and mach */
246 /* Because negative quotients are calculated as one's complements,
247 -0x80000000 divided by the smallest positive number of a number
248 range (0x80, 0x8000, 0x800000) causes saturation in the one's
249 complement representation, and we have to suppress the
250 one's -> two's complement adjustment. Since positive numbers
251 don't get such an adjustment, it's OK to also compute one's -> two's
252 complement adjustment suppression for a dividend of 0. */
259 bt/s LOCAL(div_le128)
265 bf/s LOCAL(div_ge32k)
266 cmp/hi r1,r4 ! copy sign bit of r4 into T
268 shll16 r6 ! 7 cycles up to here
274 mov r4,r0 ! re-compute adjusted dividend
280 add r4,r0 ! adjusted dividend
284 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
287 xor r1,r0 ! xor dividend with result lsb
292 add #-0x80,r8 ! r8 is 0 iff there is a match
294 swap.w r8,r7 ! or upper 16 bits...
296 or r7,r8 !...into lower 16 bits
304 xor r7,r1 ! replace lsb of result with lsb of dividend
306 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
312 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
330 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
331 mova LOCAL(div_table_inv),r0
334 mova LOCAL(div_table_clz),r0
355 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
356 mova LOCAL(div_table_inv),r0
359 mova LOCAL(div_table_clz),r0
369 bt/s LOCAL(le128_neg)
379 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
383 bt/s LOCAL(le128_neg)
385 bt LOCAL(div_by_zero)
394 bt LOCAL(div_by_zero)
409 ENDFUNC(GLOBAL(udivsi3_i4i))
410 ENDFUNC(GLOBAL(sdivsi3_i4i))
412 /* This table has been generated by divtab-sh4.c. */
542 LOCAL(div_table_clz):
671 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
672 or in bit 33 for powers of two. */
802 LOCAL(div_table_inv):
931 /* maximum error: 0.987342 scaled: 0.921875*/
933 #endif /* SH3 / SH4 */
935 #endif /* L_div_table */
936 #endif /* !__SHMEDIA__ */