1 @ libgcc routines for ARM cpu.
2 @ Division routines
, written by Richard Earnshaw
, (rearnsha
@armltd.co.uk
)
4 /* Copyright
1995, 1996, 1998, 1999, 2000 Free Software Foundation
, Inc.
6 This file is free software
; you can redistribute it and/or modify it
7 under the terms of the GNU General
Public License as published by the
8 Free Software Foundation
; either version 2, or (at your option) any
11 In addition to the permissions
in the GNU General
Public License
, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of
this file
into combinations with other programs
,
14 and to distribute those combinations without any restriction coming
15 from the use of
this file.
(The General
Public License restrictions
16 do apply
in other respects
; for example, they cover modification of
17 the file
, and distribution when
not linked
into a combine
20 This file is distributed
in the hope that it will be useful
, but
21 WITHOUT ANY WARRANTY
; without even the implied warranty of
22 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General
Public License for more details.
25 You should have received a copy of the GNU General
Public License
26 along with
this program
; see the file COPYING. If not, write to
27 the Free Software Foundation
, 59 Temple Place
- Suite
330,
28 Boston
, MA
02111-1307, USA.
*/
29 /* ------------------------------------------------------------------------ */
31 /* We need to know what prefix to
add to function names.
*/
33 #ifndef __USER_LABEL_PREFIX__
34 #error __USER_LABEL_PREFIX__
not defined
37 /* ANSI concatenation macros.
*/
39 #define CONCAT1
(a
, b
) CONCAT2
(a
, b
)
40 #define CONCAT2
(a
, b
) a ## b
42 /* Use the right prefix for
global labels.
*/
44 #define SYM
(x
) CONCAT1
(__USER_LABEL_PREFIX__
, x
)
48 #define __PLT__
/* Not supported
in Thumb assembler
(for now
).
*/
52 #define
TYPE(x
) .
type SYM
(x
),function
53 #define
SIZE(x
) .
size SYM
(x
), .
- SYM
(x
)
62 /* Function
end macros. Variants for
26 bit APCS
and interworking.
*/
64 @
This selects the minimum architecture level required.
65 #define __ARM_ARCH__
3
67 #if defined
(__ARM_ARCH_3M__
) || defined
(__ARM_ARCH_4__
) \
68 || defined
(__ARM_ARCH_4T__
)
69 /* We use __ARM_ARCH__ set to
4 here
, but
in reality it
's any processor with
70 long multiply instructions. That includes v3M. */
72 # define __ARM_ARCH__ 4
75 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
76 || defined(__ARM_ARCH_5TE__)
78 # define __ARM_ARCH__ 5
81 /* How to return from a function call depends on the architecture variant. */
85 # define RET movs pc, lr
86 # define RETc(x) mov##x##s pc, lr
88 #elif (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
91 # define RETc(x) bx##x lr
93 # if (__ARM_ARCH__ == 4) \
94 && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
95 # define __INTERWORKING__
100 # define RET mov pc, lr
101 # define RETc(x) mov##x pc, lr
105 /* Don't pass dirn
, it
's there just to get token pasting right. */
107 .macro RETLDM regs=, cond=, dirn=ia
110 ldm\cond\dirn sp!, {pc}^
112 ldm\cond\dirn sp!, {\regs, pc}^
114 #elif defined (__INTERWORKING__)
116 ldr\cond lr, [sp], #4
118 ldm\cond\dirn sp!, {\regs, lr}
123 ldr\cond pc, [sp], #4
125 ldm\cond\dirn sp!, {\regs, pc}
134 bl SYM (__div0) __PLT__
135 mov r0, #0 @ About as wrong as it could be.
144 mov r0, #0 @ About as wrong as it could be.
145 #if defined (__INTERWORKING__)
157 .macro DIV_FUNC_END name
167 .macro THUMB_FUNC_START name
174 /* Function start macros. Variants for ARM and Thumb. */
177 #define THUMB_FUNC .thumb_func
178 #define THUMB_CODE .force_thumb
184 .macro FUNC_START name
194 /* Special function that will always be coded in ARM assembly, even if
195 in Thumb-only compilation. */
197 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
198 .macro ARM_FUNC_START name
203 _L__\name: /* A hook to tell gdb that we've switched to ARM
*/
206 .
macro ARM_FUNC_START
name
212 /* Register aliases.
*/
214 work .req r4 @ XXXX is
this safe
?
228 /* ------------------------------------------------------------------------ */
229 /* Bodies of the division
and modulo routines.
*/
230 /* ------------------------------------------------------------------------ */
231 .
macro ARM_DIV_BODY dividend
, divisor
, result
, curbit
233 #if __ARM_ARCH__
>= 5
235 clz \curbit
, \divisor
236 clz
\result
, \dividend
237 sub \result
, \curbit
, \result
239 mov \divisor
, \divisor
, lsl \result
240 mov \curbit
, \curbit
, lsl \result
245 @ Initially shift the divisor left
3 bits if possible
,
246 @ set curbit accordingly.
This allows for curbit to be located
247 @ at the left
end of each
4 bit nibbles
in the division
loop
248 @ to save one
loop in most cases.
249 tst \divisor
, #
0xe0000000
250 moveq \divisor
, \divisor
, lsl #
3
254 @ Unless the divisor is very big
, shift it up
in multiples of
255 @ four bits
, since
this is the amount of unwinding
in the main
256 @ division
loop. Continue shifting until the divisor is
257 @ larger than the dividend.
258 1: cmp \divisor
, #
0x10000000
259 cmplo \divisor
, \dividend
260 movlo \divisor
, \divisor
, lsl #
4
261 movlo \curbit
, \curbit
, lsl #
4
264 @ For very big divisors
, we must shift it a bit at a time
, or
265 @ we will be
in danger of overflowing.
266 1: cmp \divisor
, #
0x80000000
267 cmplo \divisor
, \dividend
268 movlo \divisor
, \divisor
, lsl #
1
269 movlo \curbit
, \curbit
, lsl #
1
277 1: cmp \dividend
, \divisor
278 subhs \dividend
, \dividend
, \divisor
279 orrhs
\result
, \result
, \curbit
280 cmp \dividend
, \divisor
, lsr #
1
281 subhs \dividend
, \dividend
, \divisor
, lsr #
1
282 orrhs
\result
, \result
, \curbit
, lsr #
1
283 cmp \dividend
, \divisor
, lsr #
2
284 subhs \dividend
, \dividend
, \divisor
, lsr #
2
285 orrhs
\result
, \result
, \curbit
, lsr #
2
286 cmp \dividend
, \divisor
, lsr #
3
287 subhs \dividend
, \dividend
, \divisor
, lsr #
3
288 orrhs
\result
, \result
, \curbit
, lsr #
3
289 cmp \dividend
, #
0 @ Early termination
?
290 movnes \curbit
, \curbit
, lsr #
4 @ No
, any more bits to do
?
291 movne \divisor
, \divisor
, lsr #
4
295 /* ------------------------------------------------------------------------ */
296 .
macro ARM_DIV2_ORDER divisor
, order
298 #if __ARM_ARCH__
>= 5
301 rsb \order
, \order
, #
31
305 cmp \divisor
, #
(1 << 16)
306 movhs \divisor
, \divisor
, lsr #
16
310 cmp \divisor
, #
(1 << 8)
311 movhs \divisor
, \divisor
, lsr #
8
312 addhs \order
, \order
, #
8
314 cmp \divisor
, #
(1 << 4)
315 movhs \divisor
, \divisor
, lsr #
4
316 addhs \order
, \order
, #
4
318 cmp \divisor
, #
(1 << 2)
319 addhi \order
, \order
, #
3
320 addls \order
, \order
, \divisor
, lsr #
1
325 /* ------------------------------------------------------------------------ */
326 .
macro ARM_MOD_BODY dividend
, divisor
, order
, spare
328 #if __ARM_ARCH__
>= 5
331 clz \spare
, \dividend
332 sub \order
, \order
, \spare
333 mov \divisor
, \divisor
, lsl \order
339 @ Unless the divisor is very big
, shift it up
in multiples of
340 @ four bits
, since
this is the amount of unwinding
in the main
341 @ division
loop. Continue shifting until the divisor is
342 @ larger than the dividend.
343 1: cmp \divisor
, #
0x10000000
344 cmplo \divisor
, \dividend
345 movlo \divisor
, \divisor
, lsl #
4
346 addlo \order
, \order
, #
4
349 @ For very big divisors
, we must shift it a bit at a time
, or
350 @ we will be
in danger of overflowing.
351 1: cmp \divisor
, #
0x80000000
352 cmplo \divisor
, \dividend
353 movlo \divisor
, \divisor
, lsl #
1
354 addlo \order
, \order
, #
1
359 @ Perform all needed substractions to keep only the reminder.
360 @ Do comparisons
in batch of
4 first.
361 subs \order
, \order
, #
3 @ yes
, 3 is intended here
364 1: cmp \dividend
, \divisor
365 subhs \dividend
, \dividend
, \divisor
366 cmp \dividend
, \divisor
, lsr #
1
367 subhs \dividend
, \dividend
, \divisor
, lsr #
1
368 cmp \dividend
, \divisor
, lsr #
2
369 subhs \dividend
, \dividend
, \divisor
, lsr #
2
370 cmp \dividend
, \divisor
, lsr #
3
371 subhs \dividend
, \dividend
, \divisor
, lsr #
3
373 mov \divisor
, \divisor
, lsr #
4
374 subges \order
, \order
, #
4
381 @ Either
1, 2 or 3 comparison
/substractions are left.
385 cmp \dividend
, \divisor
386 subhs \dividend
, \dividend
, \divisor
387 mov \divisor
, \divisor
, lsr #
1
388 3: cmp \dividend
, \divisor
389 subhs \dividend
, \dividend
, \divisor
390 mov \divisor
, \divisor
, lsr #
1
391 4: cmp \dividend
, \divisor
392 subhs \dividend
, \dividend
, \divisor
395 /* ------------------------------------------------------------------------ */
396 .
macro THUMB_DIV_MOD_BODY modulo
397 @ Load the constant
0x10000000 into our work register.
401 @ Unless the divisor is very big
, shift it up
in multiples of
402 @ four bits
, since
this is the amount of unwinding
in the main
403 @ division
loop. Continue shifting until the divisor is
404 @ larger than the dividend.
407 cmp divisor
, dividend
413 @ Set work to
0x80000000
416 @ For very big divisors
, we must shift it a bit at a time
, or
417 @ we will be
in danger of overflowing.
420 cmp divisor
, dividend
426 @
Test for possible subtractions ...
428 @ ... On the final pass
, this may subtract too much from the dividend
,
429 @ so keep track of which subtractions are done
, we can fix them up
432 cmp dividend
, divisor
434 sub dividend
, dividend
, divisor
436 lsr work
, divisor
, #
1
439 sub dividend
, dividend
, work
446 lsr work
, divisor
, #
2
449 sub dividend
, dividend
, work
456 lsr work
, divisor
, #
3
459 sub dividend
, dividend
, work
468 @ ...
and note which bits are done
in the result. On the final pass
,
469 @
this may subtract too much from the dividend
, but the result will be ok
,
470 @ since the
"bit" will have been shifted
out at the bottom.
471 cmp dividend
, divisor
473 sub dividend
, dividend
, divisor
474 orr result
, result
, curbit
476 lsr work
, divisor
, #
1
479 sub dividend
, dividend
, work
483 lsr work
, divisor
, #
2
486 sub dividend
, dividend
, work
490 lsr work
, divisor
, #
3
493 sub dividend
, dividend
, work
499 cmp dividend
, #
0 @ Early termination
?
501 lsr curbit
, #
4 @ No
, any more bits to do
?
507 @ Any subtractions that we should
not have done will be recorded
in
508 @ the top three bits of
"overdone". Exactly which were
not needed
509 @ are governed by the position of the bit
, stored
in ip.
513 beq LSYM
(Lgot_result
)
515 @ If we terminated early
, because dividend became zero
, then the
516 @ bit
in ip will
not be
in the bottom nibble
, and we should
not
517 @ perform the additions below. We must
test for
this though
518 @
(rather relying upon the TSTs to prevent the additions
) since
519 @ the bit
in ip could be
in the top two bits which might then match
520 @ with one of the smaller RORs.
524 beq LSYM
(Lgot_result
)
531 lsr work
, divisor
, #
3
539 lsr work
, divisor
, #
2
546 beq LSYM
(Lgot_result
)
547 lsr work
, divisor
, #
1
552 /* ------------------------------------------------------------------------ */
553 /* Start of the Real Functions
*/
554 /* ------------------------------------------------------------------------ */
567 cmp dividend
, divisor
568 blo LSYM
(Lgot_result
)
576 #else
/* ARM version.
*/
586 ARM_DIV_BODY r0
, r1
, r2
, r3
595 12: ARM_DIV2_ORDER r1
, r2
600 #endif
/* ARM version
*/
604 #endif
/* L_udivsi3
*/
605 /* ------------------------------------------------------------------------ */
615 cmp dividend
, divisor
627 #else
/* ARM version.
*/
629 subs r2
, r1
, #
1 @ compare divisor with
1
631 cmpne r0
, r1 @ compare dividend with divisor
633 tsthi r1
, r2 @ see if divisor is power of
2
637 ARM_MOD_BODY r0
, r1
, r2
, r3
641 #endif
/* ARM version.
*/
645 #endif
/* L_umodsi3
*/
646 /* ------------------------------------------------------------------------ */
657 eor work
, divisor @ Save the sign of the result.
663 neg divisor
, divisor @ Loops below use unsigned.
667 neg dividend
, dividend
669 cmp dividend
, divisor
670 blo LSYM
(Lgot_result
)
683 #else
/* ARM version.
*/
686 eor ip
, r0
, r1 @ save the sign of the result.
688 rsbmi r1
, r1
, #
0 @ loops below use unsigned.
689 subs r2
, r1
, #
1 @ division by
1 or -1 ?
692 rsbmi r3
, r0
, #
0 @ positive dividend value
695 tst r1
, r2 @ divisor is power of
2 ?
698 ARM_DIV_BODY r3
, r1
, r0
, r2
704 10: teq ip
, r0 @ same sign
?
709 moveq r0
, ip
, asr #
31
713 12: ARM_DIV2_ORDER r1
, r2
720 #endif
/* ARM version
*/
724 #endif
/* L_divsi3
*/
725 /* ------------------------------------------------------------------------ */
736 neg divisor
, divisor @ Loops below use unsigned.
739 @ Need to save the sign of the dividend
, unfortunately
, we need
740 @ work later on. Must do
this after saving the original value of
741 @ the work register
, because we will
pop this value off first.
745 neg dividend
, dividend
747 cmp dividend
, divisor
748 blo LSYM
(Lgot_result
)
755 neg dividend
, dividend
760 #else
/* ARM version.
*/
764 rsbmi r1
, r1
, #
0 @ loops below use unsigned.
765 movs ip
, r0 @ preserve sign of dividend
766 rsbmi r0
, r0
, #
0 @ if negative make positive
767 subs r2
, r1
, #
1 @ compare divisor with
1
768 cmpne r0
, r1 @ compare dividend with divisor
770 tsthi r1
, r2 @ see if divisor is power of
2
774 ARM_MOD_BODY r0
, r1
, r2
, r3
780 #endif
/* ARM version
*/
784 #endif
/* L_modsi3
*/
785 /* ------------------------------------------------------------------------ */
794 #endif
/* L_divmodsi_tools
*/
795 /* ------------------------------------------------------------------------ */
797 @ GNU
/Linux division
-by zero handler. Used
in place of L_dvmd_tls
799 /* Constants taken from
<asm
/unistd.h
> and <asm
/signal.h
> */
801 #define __NR_SYSCALL_BASE
0x900000
802 #define __NR_getpid
(__NR_SYSCALL_BASE
+ 20)
803 #define __NR_kill
(__NR_SYSCALL_BASE
+ 37)
818 #endif
/* L_dvmd_lnx
*/
819 /* ------------------------------------------------------------------------ */
820 /* These next two sections are here despite the fact that they contain Thumb
821 assembler because their presence allows interworked code to be linked even
822 when the GCC library is
this one.
*/
824 /* Do
not build the interworking functions when the target architecture does
825 not support Thumb instructions.
(This can be a multilib option
).
*/
826 #if defined L_call_via_rX
&& (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
)
828 /* These labels
& instructions are used by the Arm
/Thumb interworking code.
829 The address of function to be called is loaded
into a register
and then
830 one of these labels is called via a
BL instruction.
This puts the
831 return address
into the link register with the bottom bit set
, and the
832 code here switches to the correct mode before executing the function.
*/
838 .
macro call_via register
839 THUMB_FUNC_START _call_via_
\register
844 SIZE (_call_via_
\register
)
863 #endif
/* L_call_via_rX
*/
864 /* ------------------------------------------------------------------------ */
865 /* Do
not build the interworking functions when the target architecture does
866 not support Thumb instructions.
(This can be a multilib option
).
*/
867 #if defined L_interwork_call_via_rX
&& (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
)
869 /* These labels
& instructions are used by the Arm
/Thumb interworking code
,
870 when the target address is
in an unknown instruction set. The address
871 of function to be called is loaded
into a register
and then one of these
872 labels is called via a
BL instruction.
This puts the return address
873 into the link register with the bottom bit set
, and the code here
874 switches to the correct mode before executing the function. Unfortunately
875 the target code cannot be relied upon to return via a
BX instruction
, so
876 instead we have to store the resturn address on the stack
and allow the
877 called function to return here instead. Upon return we recover the real
878 return address
and use a
BX to get back to Thumb mode.
*/
889 .
macro interwork register
892 THUMB_FUNC_START _interwork_call_via_
\register
898 .globl LSYM
(Lchange_
\register
)
899 LSYM
(Lchange_
\register
):
902 adreq lr
, _arm_return
905 SIZE (_interwork_call_via_
\register
)
923 /* The LR case has to be handled a little differently...
*/
926 THUMB_FUNC_START _interwork_call_via_lr
937 adreq lr
, _arm_return
940 SIZE (_interwork_call_via_lr
)
942 #endif
/* L_interwork_call_via_rX
*/
944 #
include "ieee754-df.S"
945 #
include "ieee754-sf.S"