1 @ libgcc routines for ARM cpu.
2 @ Division routines
, written by Richard Earnshaw
, (rearnsha
@armltd.co.uk
)
4 /* Copyright
1995, 1996, 1998, 1999, 2000, 2003, 2004
5 Free Software Foundation
, Inc.
7 This file is free software
; you can redistribute it and/or modify it
8 under the terms of the GNU General
Public License as published by the
9 Free Software Foundation
; either version 2, or (at your option) any
12 In addition to the permissions
in the GNU General
Public License
, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of
this file
into combinations with other programs
,
15 and to distribute those combinations without any restriction coming
16 from the use of
this file.
(The General
Public License restrictions
17 do apply
in other respects
; for example, they cover modification of
18 the file
, and distribution when
not linked
into a combine
21 This file is distributed
in the hope that it will be useful
, but
22 WITHOUT ANY WARRANTY
; without even the implied warranty of
23 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General
Public License for more details.
26 You should have received a copy of the GNU General
Public License
27 along with
this program
; see the file COPYING. If not, write to
28 the Free Software Foundation
, 59 Temple Place
- Suite
330,
29 Boston
, MA
02111-1307, USA.
*/
30 /* ------------------------------------------------------------------------ */
32 /* We need to know what prefix to
add to function names.
*/
34 #ifndef __USER_LABEL_PREFIX__
35 #error __USER_LABEL_PREFIX__
not defined
38 /* ANSI concatenation macros.
*/
40 #define CONCAT1
(a
, b
) CONCAT2
(a
, b
)
41 #define CONCAT2
(a
, b
) a ## b
43 /* Use the right prefix for
global labels.
*/
45 #define SYM
(x
) CONCAT1
(__USER_LABEL_PREFIX__
, x
)
49 #define __PLT__
/* Not supported
in Thumb assembler
(for now
).
*/
53 #define
TYPE(x
) .
type SYM
(x
),function
54 #define
SIZE(x
) .
size SYM
(x
), .
- SYM
(x
)
63 /* Function
end macros. Variants for
26 bit APCS
and interworking.
*/
65 @
This selects the minimum architecture level required.
66 #define __ARM_ARCH__
3
68 #if defined
(__ARM_ARCH_3M__
) || defined
(__ARM_ARCH_4__
) \
69 || defined
(__ARM_ARCH_4T__
)
70 /* We use __ARM_ARCH__ set to
4 here
, but
in reality it
's any processor with
71 long multiply instructions. That includes v3M. */
73 # define __ARM_ARCH__ 4
76 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77 || defined(__ARM_ARCH_5TE__)
79 # define __ARM_ARCH__ 5
82 /* How to return from a function call depends on the architecture variant. */
86 # define RET movs pc, lr
87 # define RETc(x) mov##x##s pc, lr
89 #elif (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
92 # define RETc(x) bx##x lr
94 # if (__ARM_ARCH__ == 4) \
95 && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
96 # define __INTERWORKING__
101 # define RET mov pc, lr
102 # define RETc(x) mov##x pc, lr
106 /* Don't pass dirn
, it
's there just to get token pasting right. */
108 .macro RETLDM regs=, cond=, dirn=ia
111 ldm\cond\dirn sp!, {pc}^
113 ldm\cond\dirn sp!, {\regs, pc}^
115 #elif defined (__INTERWORKING__)
117 ldr\cond lr, [sp], #4
119 ldm\cond\dirn sp!, {\regs, lr}
124 ldr\cond pc, [sp], #4
126 ldm\cond\dirn sp!, {\regs, pc}
135 bl SYM (__div0) __PLT__
136 mov r0, #0 @ About as wrong as it could be.
145 mov r0, #0 @ About as wrong as it could be.
146 #if defined (__INTERWORKING__)
158 .macro DIV_FUNC_END name
168 .macro THUMB_FUNC_START name
175 /* Function start macros. Variants for ARM and Thumb. */
178 #define THUMB_FUNC .thumb_func
179 #define THUMB_CODE .force_thumb
185 .macro FUNC_START name
195 /* Special function that will always be coded in ARM assembly, even if
196 in Thumb-only compilation. */
198 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
199 .macro ARM_FUNC_START name
204 _L__\name: /* A hook to tell gdb that we've switched to ARM
*/
206 #define EQUIV .thumb_set
208 .
macro ARM_FUNC_START
name
219 .
macro ARM_FUNC_ALIAS new old
221 EQUIV SYM
(__
\new
), SYM
(__\old
)
225 /* Register aliases.
*/
227 work .req r4 @ XXXX is
this safe
?
241 /* ------------------------------------------------------------------------ */
242 /* Bodies of the division
and modulo routines.
*/
243 /* ------------------------------------------------------------------------ */
244 .
macro ARM_DIV_BODY dividend
, divisor
, result
, curbit
246 #if __ARM_ARCH__
>= 5
248 clz \curbit
, \divisor
249 clz
\result
, \dividend
250 sub \result
, \curbit
, \result
252 mov \divisor
, \divisor
, lsl \result
253 mov \curbit
, \curbit
, lsl \result
258 @ Initially shift the divisor left
3 bits if possible
,
259 @ set curbit accordingly.
This allows for curbit to be located
260 @ at the left
end of each
4 bit nibbles
in the division
loop
261 @ to save one
loop in most cases.
262 tst \divisor
, #
0xe0000000
263 moveq \divisor
, \divisor
, lsl #
3
267 @ Unless the divisor is very big
, shift it up
in multiples of
268 @ four bits
, since
this is the amount of unwinding
in the main
269 @ division
loop. Continue shifting until the divisor is
270 @ larger than the dividend.
271 1: cmp \divisor
, #
0x10000000
272 cmplo \divisor
, \dividend
273 movlo \divisor
, \divisor
, lsl #
4
274 movlo \curbit
, \curbit
, lsl #
4
277 @ For very big divisors
, we must shift it a bit at a time
, or
278 @ we will be
in danger of overflowing.
279 1: cmp \divisor
, #
0x80000000
280 cmplo \divisor
, \dividend
281 movlo \divisor
, \divisor
, lsl #
1
282 movlo \curbit
, \curbit
, lsl #
1
290 1: cmp \dividend
, \divisor
291 subhs \dividend
, \dividend
, \divisor
292 orrhs
\result
, \result
, \curbit
293 cmp \dividend
, \divisor
, lsr #
1
294 subhs \dividend
, \dividend
, \divisor
, lsr #
1
295 orrhs
\result
, \result
, \curbit
, lsr #
1
296 cmp \dividend
, \divisor
, lsr #
2
297 subhs \dividend
, \dividend
, \divisor
, lsr #
2
298 orrhs
\result
, \result
, \curbit
, lsr #
2
299 cmp \dividend
, \divisor
, lsr #
3
300 subhs \dividend
, \dividend
, \divisor
, lsr #
3
301 orrhs
\result
, \result
, \curbit
, lsr #
3
302 cmp \dividend
, #
0 @ Early termination
?
303 movnes \curbit
, \curbit
, lsr #
4 @ No
, any more bits to do
?
304 movne \divisor
, \divisor
, lsr #
4
308 /* ------------------------------------------------------------------------ */
309 .
macro ARM_DIV2_ORDER divisor
, order
311 #if __ARM_ARCH__
>= 5
314 rsb \order
, \order
, #
31
318 cmp \divisor
, #
(1 << 16)
319 movhs \divisor
, \divisor
, lsr #
16
323 cmp \divisor
, #
(1 << 8)
324 movhs \divisor
, \divisor
, lsr #
8
325 addhs \order
, \order
, #
8
327 cmp \divisor
, #
(1 << 4)
328 movhs \divisor
, \divisor
, lsr #
4
329 addhs \order
, \order
, #
4
331 cmp \divisor
, #
(1 << 2)
332 addhi \order
, \order
, #
3
333 addls \order
, \order
, \divisor
, lsr #
1
338 /* ------------------------------------------------------------------------ */
339 .
macro ARM_MOD_BODY dividend
, divisor
, order
, spare
341 #if __ARM_ARCH__
>= 5
344 clz \spare
, \dividend
345 sub \order
, \order
, \spare
346 mov \divisor
, \divisor
, lsl \order
352 @ Unless the divisor is very big
, shift it up
in multiples of
353 @ four bits
, since
this is the amount of unwinding
in the main
354 @ division
loop. Continue shifting until the divisor is
355 @ larger than the dividend.
356 1: cmp \divisor
, #
0x10000000
357 cmplo \divisor
, \dividend
358 movlo \divisor
, \divisor
, lsl #
4
359 addlo \order
, \order
, #
4
362 @ For very big divisors
, we must shift it a bit at a time
, or
363 @ we will be
in danger of overflowing.
364 1: cmp \divisor
, #
0x80000000
365 cmplo \divisor
, \dividend
366 movlo \divisor
, \divisor
, lsl #
1
367 addlo \order
, \order
, #
1
372 @ Perform all needed substractions to keep only the reminder.
373 @ Do comparisons
in batch of
4 first.
374 subs \order
, \order
, #
3 @ yes
, 3 is intended here
377 1: cmp \dividend
, \divisor
378 subhs \dividend
, \dividend
, \divisor
379 cmp \dividend
, \divisor
, lsr #
1
380 subhs \dividend
, \dividend
, \divisor
, lsr #
1
381 cmp \dividend
, \divisor
, lsr #
2
382 subhs \dividend
, \dividend
, \divisor
, lsr #
2
383 cmp \dividend
, \divisor
, lsr #
3
384 subhs \dividend
, \dividend
, \divisor
, lsr #
3
386 mov \divisor
, \divisor
, lsr #
4
387 subges \order
, \order
, #
4
394 @ Either
1, 2 or 3 comparison
/substractions are left.
398 cmp \dividend
, \divisor
399 subhs \dividend
, \dividend
, \divisor
400 mov \divisor
, \divisor
, lsr #
1
401 3: cmp \dividend
, \divisor
402 subhs \dividend
, \dividend
, \divisor
403 mov \divisor
, \divisor
, lsr #
1
404 4: cmp \dividend
, \divisor
405 subhs \dividend
, \dividend
, \divisor
408 /* ------------------------------------------------------------------------ */
409 .
macro THUMB_DIV_MOD_BODY modulo
410 @ Load the constant
0x10000000 into our work register.
414 @ Unless the divisor is very big
, shift it up
in multiples of
415 @ four bits
, since
this is the amount of unwinding
in the main
416 @ division
loop. Continue shifting until the divisor is
417 @ larger than the dividend.
420 cmp divisor
, dividend
426 @ Set work to
0x80000000
429 @ For very big divisors
, we must shift it a bit at a time
, or
430 @ we will be
in danger of overflowing.
433 cmp divisor
, dividend
439 @
Test for possible subtractions ...
441 @ ... On the final pass
, this may subtract too much from the dividend
,
442 @ so keep track of which subtractions are done
, we can fix them up
445 cmp dividend
, divisor
447 sub dividend
, dividend
, divisor
449 lsr work
, divisor
, #
1
452 sub dividend
, dividend
, work
459 lsr work
, divisor
, #
2
462 sub dividend
, dividend
, work
469 lsr work
, divisor
, #
3
472 sub dividend
, dividend
, work
481 @ ...
and note which bits are done
in the result. On the final pass
,
482 @
this may subtract too much from the dividend
, but the result will be ok
,
483 @ since the
"bit" will have been shifted
out at the bottom.
484 cmp dividend
, divisor
486 sub dividend
, dividend
, divisor
487 orr result
, result
, curbit
489 lsr work
, divisor
, #
1
492 sub dividend
, dividend
, work
496 lsr work
, divisor
, #
2
499 sub dividend
, dividend
, work
503 lsr work
, divisor
, #
3
506 sub dividend
, dividend
, work
512 cmp dividend
, #
0 @ Early termination
?
514 lsr curbit
, #
4 @ No
, any more bits to do
?
520 @ Any subtractions that we should
not have done will be recorded
in
521 @ the top three bits of
"overdone". Exactly which were
not needed
522 @ are governed by the position of the bit
, stored
in ip.
526 beq LSYM
(Lgot_result
)
528 @ If we terminated early
, because dividend became zero
, then the
529 @ bit
in ip will
not be
in the bottom nibble
, and we should
not
530 @ perform the additions below. We must
test for
this though
531 @
(rather relying upon the TSTs to prevent the additions
) since
532 @ the bit
in ip could be
in the top two bits which might then match
533 @ with one of the smaller RORs.
537 beq LSYM
(Lgot_result
)
544 lsr work
, divisor
, #
3
552 lsr work
, divisor
, #
2
559 beq LSYM
(Lgot_result
)
560 lsr work
, divisor
, #
1
565 /* ------------------------------------------------------------------------ */
566 /* Start of the Real Functions
*/
567 /* ------------------------------------------------------------------------ */
580 cmp dividend
, divisor
581 blo LSYM
(Lgot_result
)
589 #else
/* ARM version.
*/
599 ARM_DIV_BODY r0
, r1
, r2
, r3
608 12: ARM_DIV2_ORDER r1
, r2
613 #endif
/* ARM version
*/
617 #endif
/* L_udivsi3
*/
618 /* ------------------------------------------------------------------------ */
628 cmp dividend
, divisor
640 #else
/* ARM version.
*/
642 subs r2
, r1
, #
1 @ compare divisor with
1
644 cmpne r0
, r1 @ compare dividend with divisor
646 tsthi r1
, r2 @ see if divisor is power of
2
650 ARM_MOD_BODY r0
, r1
, r2
, r3
654 #endif
/* ARM version.
*/
658 #endif
/* L_umodsi3
*/
659 /* ------------------------------------------------------------------------ */
670 eor work
, divisor @ Save the sign of the result.
676 neg divisor
, divisor @ Loops below use unsigned.
680 neg dividend
, dividend
682 cmp dividend
, divisor
683 blo LSYM
(Lgot_result
)
696 #else
/* ARM version.
*/
699 eor ip
, r0
, r1 @ save the sign of the result.
701 rsbmi r1
, r1
, #
0 @ loops below use unsigned.
702 subs r2
, r1
, #
1 @ division by
1 or -1 ?
705 rsbmi r3
, r0
, #
0 @ positive dividend value
708 tst r1
, r2 @ divisor is power of
2 ?
711 ARM_DIV_BODY r3
, r1
, r0
, r2
717 10: teq ip
, r0 @ same sign
?
722 moveq r0
, ip
, asr #
31
726 12: ARM_DIV2_ORDER r1
, r2
733 #endif
/* ARM version
*/
737 #endif
/* L_divsi3
*/
738 /* ------------------------------------------------------------------------ */
749 neg divisor
, divisor @ Loops below use unsigned.
752 @ Need to save the sign of the dividend
, unfortunately
, we need
753 @ work later on. Must do
this after saving the original value of
754 @ the work register
, because we will
pop this value off first.
758 neg dividend
, dividend
760 cmp dividend
, divisor
761 blo LSYM
(Lgot_result
)
768 neg dividend
, dividend
773 #else
/* ARM version.
*/
777 rsbmi r1
, r1
, #
0 @ loops below use unsigned.
778 movs ip
, r0 @ preserve sign of dividend
779 rsbmi r0
, r0
, #
0 @ if negative make positive
780 subs r2
, r1
, #
1 @ compare divisor with
1
781 cmpne r0
, r1 @ compare dividend with divisor
783 tsthi r1
, r2 @ see if divisor is power of
2
787 ARM_MOD_BODY r0
, r1
, r2
, r3
793 #endif
/* ARM version
*/
797 #endif
/* L_modsi3
*/
798 /* ------------------------------------------------------------------------ */
807 #endif
/* L_divmodsi_tools
*/
808 /* ------------------------------------------------------------------------ */
810 @ GNU
/Linux division
-by zero handler. Used
in place of L_dvmd_tls
812 /* Constants taken from
<asm
/unistd.h
> and <asm
/signal.h
> */
814 #define __NR_SYSCALL_BASE
0x900000
815 #define __NR_getpid
(__NR_SYSCALL_BASE
+ 20)
816 #define __NR_kill
(__NR_SYSCALL_BASE
+ 37)
831 #endif
/* L_dvmd_lnx
*/
832 /* ------------------------------------------------------------------------ */
833 /* These next two sections are here despite the fact that they contain Thumb
834 assembler because their presence allows interworked code to be linked even
835 when the GCC library is
this one.
*/
837 /* Do
not build the interworking functions when the target architecture does
838 not support Thumb instructions.
(This can be a multilib option
).
*/
839 #if defined L_call_via_rX
&& (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
)
841 /* These labels
& instructions are used by the Arm
/Thumb interworking code.
842 The address of function to be called is loaded
into a register
and then
843 one of these labels is called via a
BL instruction.
This puts the
844 return address
into the link register with the bottom bit set
, and the
845 code here switches to the correct mode before executing the function.
*/
851 .
macro call_via register
852 THUMB_FUNC_START _call_via_
\register
857 SIZE (_call_via_
\register
)
876 #endif
/* L_call_via_rX
*/
877 /* ------------------------------------------------------------------------ */
878 /* Do
not build the interworking functions when the target architecture does
879 not support Thumb instructions.
(This can be a multilib option
).
*/
880 #if defined L_interwork_call_via_rX
&& (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
)
882 /* These labels
& instructions are used by the Arm
/Thumb interworking code
,
883 when the target address is
in an unknown instruction set. The address
884 of function to be called is loaded
into a register
and then one of these
885 labels is called via a
BL instruction.
This puts the return address
886 into the link register with the bottom bit set
, and the code here
887 switches to the correct mode before executing the function. Unfortunately
888 the target code cannot be relied upon to return via a
BX instruction
, so
889 instead we have to store the resturn address on the stack
and allow the
890 called function to return here instead. Upon return we recover the real
891 return address
and use a
BX to get back to Thumb mode.
*/
902 .
macro interwork register
905 THUMB_FUNC_START _interwork_call_via_
\register
911 .globl LSYM
(Lchange_
\register
)
912 LSYM
(Lchange_
\register
):
915 adreq lr
, _arm_return
918 SIZE (_interwork_call_via_
\register
)
936 /* The LR case has to be handled a little differently...
*/
939 THUMB_FUNC_START _interwork_call_via_lr
950 adreq lr
, _arm_return
953 SIZE (_interwork_call_via_lr
)
955 #endif
/* L_interwork_call_via_rX
*/
957 #
include "ieee754-df.S"
958 #
include "ieee754-sf.S"