1 /* Copyright (C) 2004-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
23 /* libgcc routines for the STMicroelectronics ST40-300 CPU.
24 Contributed by J"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h"
30 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
31 /* This code used shld, thus is not suitable for SH1 / SH2. */
33 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
34 Uses a lookup table for divisors in the range -128 .. +127, and
35 div1 with case distinction for larger divisors in three more ranges.
36 The code is lumped together with the table to allow the use of mova. */
37 #ifdef __LITTLE_ENDIAN__
47 .global GLOBAL(udivsi3_i4i)
48 .global GLOBAL(sdivsi3_i4i)
49 FUNC(GLOBAL(udivsi3_i4i))
50 FUNC(GLOBAL(sdivsi3_i4i))
53 LOCAL(div_ge8m): ! 10 cycles up to here
54 rotcr r1 ! signed shift must use original sign from r4
61 swap.w r5,r0 ! detect -0x80000000 : 0x800000
96 ! 31 cycles up to here
99 LOCAL(udiv_ge64k): ! 3 cycles up to here
107 ! 7 cycles up to here
111 extu.b r4,r1 ! 15 cycles up to here
118 .endr ! 25 cycles up to here
124 rotcl r0 ! 28 cycles up to here
127 LOCAL(udiv_r8): ! 6 cycles up to here
139 ! 12 cycles up to here
143 mov.l @r15+,r6 ! 24 cycles up to here
148 LOCAL(div_ge32k): ! 6 cycles up to here
156 cmp/hi r1,r4 ! copy sign bit of r4 into T
157 rotcr r1 ! signed shift must use original sign from r4
162 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
184 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
188 swap.w r7,r7 ! 26 cycles up to here.
204 shad r1,r5 ! 34 cycles up to here
221 extu.b r4,r0 ! 7 cycles up to here
224 .endr ! 15 cycles up to here
225 xor r1,r0 ! xor dividend with result lsb
229 mov.l r7,@-r15 ! 21 cycles up to here
235 xor r7,r1 ! replace lsb of result with lsb of dividend
242 div1 r6,r1 ! 28 cycles up to here
244 /* This is link-compatible with a GLOBAL(sdivsi3) call,
245 but we effectively clobber only r1, macl and mach */
246 /* Because negative quotients are calculated as one's complements,
247 -0x80000000 divided by the smallest positive number of a number
248 range (0x80, 0x8000, 0x800000) causes saturation in the one's
249 complement representation, and we have to suppress the
250 one's -> two's complement adjustment. Since positive numbers
251 don't get such an adjustment, it's OK to also compute one's -> two's
252 complement adjustment suppression for a dividend of 0. */
259 bt/s LOCAL(div_le128)
265 bf/s LOCAL(div_ge32k)
266 cmp/hi r1,r4 ! copy sign bit of r4 into T
268 shll16 r6 ! 7 cycles up to here
274 mov r4,r0 ! re-compute adjusted dividend
280 add r4,r0 ! adjusted dividend
284 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
287 xor r1,r0 ! xor dividend with result lsb
292 add #-0x80,r8 ! r8 is 0 iff there is a match
294 swap.w r8,r7 ! or upper 16 bits...
296 or r7,r8 !...into lower 16 bits
304 xor r7,r1 ! replace lsb of result with lsb of dividend
306 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
312 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
330 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
331 mova LOCAL(div_table_inv),r0
334 mova LOCAL(div_table_clz),r0
355 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
356 mova LOCAL(div_table_inv),r0
359 mova LOCAL(div_table_clz),r0
369 bt/s LOCAL(le128_neg)
379 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
383 bt/s LOCAL(le128_neg)
385 bt LOCAL(div_by_zero)
394 bt LOCAL(div_by_zero)
409 ENDFUNC(GLOBAL(udivsi3_i4i))
410 ENDFUNC(GLOBAL(sdivsi3_i4i))
412 /* This table has been generated by divtab-sh4.c. */
542 LOCAL(div_table_clz):
671 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
672 or in bit 33 for powers of two. */
802 LOCAL(div_table_inv):
931 /* maximum error: 0.987342 scaled: 0.921875*/
933 #endif /* SH3 / SH4 */
935 #endif /* L_div_table */
936 #endif /* !__SHMEDIA__ */