2007-01-03 Paul Brook <paul@codesourcery.com>
[official-gcc.git] / gcc / config / arm / lib1funcs.asm
blobf0cf5db85e8cff66d19fe41275b0ca4a08e6036d
1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007
5 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29 Boston, MA 02110-1301, USA. */
31 /* An executable stack is *not* required for these functions. */
32 #if defined(__ELF__) && defined(__linux__)
33 .section .note.GNU-stack,"",%progbits
34 .previous
35 #endif
37 /* ------------------------------------------------------------------------ */
39 /* We need to know what prefix to add to function names. */
41 #ifndef __USER_LABEL_PREFIX__
42 #error __USER_LABEL_PREFIX__ not defined
43 #endif
45 /* ANSI concatenation macros. */
47 #define CONCAT1(a, b) CONCAT2(a, b)
48 #define CONCAT2(a, b) a ## b
50 /* Use the right prefix for global labels. */
52 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
54 #ifdef __ELF__
55 #ifdef __thumb__
56 #define __PLT__ /* Not supported in Thumb assembler (for now). */
57 #else
58 #define __PLT__ (PLT)
59 #endif
60 #define TYPE(x) .type SYM(x),function
61 #define SIZE(x) .size SYM(x), . - SYM(x)
62 #define LSYM(x) .x
63 #else
64 #define __PLT__
65 #define TYPE(x)
66 #define SIZE(x)
67 #define LSYM(x) x
68 #endif
70 /* Function end macros. Variants for interworking. */
72 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
73 || defined(__ARM_ARCH_4T__)
74 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
75 long multiply instructions. That includes v3M. */
76 # define __ARM_ARCH__ 4
77 #endif
79 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
80 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
81 || defined(__ARM_ARCH_5TEJ__)
82 # define __ARM_ARCH__ 5
83 #endif
85 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
86 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
87 || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
88 # define __ARM_ARCH__ 6
89 #endif
91 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
92 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
93 # define __ARM_ARCH__ 7
94 #endif
96 #ifndef __ARM_ARCH__
97 #error Unable to determine architecture.
98 #endif
100 /* How to return from a function call depends on the architecture variant. */
102 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
104 # define RET bx lr
105 # define RETc(x) bx##x lr
107 /* Special precautions for interworking on armv4t. */
108 # if (__ARM_ARCH__ == 4)
110 /* Always use bx, not ldr pc. */
111 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
112 # define __INTERWORKING__
113 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
115 /* Include thumb stub before arm mode code. */
116 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
117 # define __INTERWORKING_STUBS__
118 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
120 #endif /* __ARM_ARCH == 4 */
122 #else
124 # define RET mov pc, lr
125 # define RETc(x) mov##x pc, lr
127 #endif
129 .macro cfi_pop advance, reg, cfa_offset
130 #ifdef __ELF__
131 .pushsection .debug_frame
132 .byte 0x4 /* DW_CFA_advance_loc4 */
133 .4byte \advance
134 .byte (0xc0 | \reg) /* DW_CFA_restore */
135 .byte 0xe /* DW_CFA_def_cfa_offset */
136 .uleb128 \cfa_offset
137 .popsection
138 #endif
139 .endm
140 .macro cfi_push advance, reg, offset, cfa_offset
141 #ifdef __ELF__
142 .pushsection .debug_frame
143 .byte 0x4 /* DW_CFA_advance_loc4 */
144 .4byte \advance
145 .byte (0x80 | \reg) /* DW_CFA_offset */
146 .uleb128 (\offset / -4)
147 .byte 0xe /* DW_CFA_def_cfa_offset */
148 .uleb128 \cfa_offset
149 .popsection
150 #endif
151 .endm
152 .macro cfi_start start_label, end_label
153 #ifdef __ELF__
154 .pushsection .debug_frame
155 LSYM(Lstart_frame):
156 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
157 LSYM(Lstart_cie):
158 .4byte 0xffffffff @ CIE Identifier Tag
159 .byte 0x1 @ CIE Version
160 .ascii "\0" @ CIE Augmentation
161 .uleb128 0x1 @ CIE Code Alignment Factor
162 .sleb128 -4 @ CIE Data Alignment Factor
163 .byte 0xe @ CIE RA Column
164 .byte 0xc @ DW_CFA_def_cfa
165 .uleb128 0xd
166 .uleb128 0x0
168 .align 2
169 LSYM(Lend_cie):
170 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
171 LSYM(Lstart_fde):
172 .4byte LSYM(Lstart_frame) @ FDE CIE offset
173 .4byte \start_label @ FDE initial location
174 .4byte \end_label-\start_label @ FDE address range
175 .popsection
176 #endif
177 .endm
178 .macro cfi_end end_label
179 #ifdef __ELF__
180 .pushsection .debug_frame
181 .align 2
182 LSYM(Lend_fde):
183 .popsection
184 \end_label:
185 #endif
186 .endm
188 /* Don't pass dirn, it's there just to get token pasting right. */
190 .macro RETLDM regs=, cond=, unwind=, dirn=ia
191 #if defined (__INTERWORKING__)
192 .ifc "\regs",""
193 ldr\cond lr, [sp], #8
194 .else
195 # if defined(__thumb2__)
196 pop\cond {\regs, lr}
197 # else
198 ldm\cond\dirn sp!, {\regs, lr}
199 # endif
200 .endif
201 .ifnc "\unwind", ""
202 /* Mark LR as restored. */
203 97: cfi_pop 97b - \unwind, 0xe, 0x0
204 .endif
205 bx\cond lr
206 #else
207 /* Caller is responsible for providing IT instruction. */
208 .ifc "\regs",""
209 ldr\cond pc, [sp], #8
210 .else
211 # if defined(__thumb2__)
212 pop\cond {\regs, pc}
213 # else
214 ldm\cond\dirn sp!, {\regs, lr}
215 # endif
216 .endif
217 #endif
218 .endm
220 /* The Unified assembly syntax allows the same code to be assembled for both
221 ARM and Thumb-2. However this is only supported by recent gas, so define
222 a set of macros to allow ARM code on older assemblers. */
223 #if defined(__thumb2__)
224 .macro do_it cond, suffix=""
225 it\suffix \cond
226 .endm
227 .macro shift1 op, arg0, arg1, arg2
228 \op \arg0, \arg1, \arg2
229 .endm
230 #define do_push push
231 #define do_pop pop
232 #define COND(op1, op2, cond) op1 ## op2 ## cond
233 /* Perform an arithmetic operation with a variable shift operand. This
234 requires two instructions and a scratch register on Thumb-2. */
235 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
236 \shiftop \tmp, \src2, \shiftreg
237 \name \dest, \src1, \tmp
238 .endm
239 #else
240 .macro do_it cond, suffix=""
241 .endm
242 .macro shift1 op, arg0, arg1, arg2
243 mov \arg0, \arg1, \op \arg2
244 .endm
245 #define do_push stmfd sp!,
246 #define do_pop ldmfd sp!,
247 #define COND(op1, op2, cond) op1 ## cond ## op2
248 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
249 \name \dest, \src1, \src2, \shiftop \shiftreg
250 .endm
251 #endif
253 .macro ARM_LDIV0 name
254 str lr, [sp, #-8]!
255 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
256 bl SYM (__div0) __PLT__
257 mov r0, #0 @ About as wrong as it could be.
258 RETLDM unwind=98b
259 .endm
262 .macro THUMB_LDIV0 name
263 push { r1, lr }
264 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
265 bl SYM (__div0)
266 mov r0, #0 @ About as wrong as it could be.
267 #if defined (__INTERWORKING__)
268 pop { r1, r2 }
269 bx r2
270 #else
271 pop { r1, pc }
272 #endif
273 .endm
275 .macro FUNC_END name
276 SIZE (__\name)
277 .endm
279 .macro DIV_FUNC_END name
280 cfi_start __\name, LSYM(Lend_div0)
281 LSYM(Ldiv0):
282 #ifdef __thumb__
283 THUMB_LDIV0 \name
284 #else
285 ARM_LDIV0 \name
286 #endif
287 cfi_end LSYM(Lend_div0)
288 FUNC_END \name
289 .endm
291 .macro THUMB_FUNC_START name
292 .globl SYM (\name)
293 TYPE (\name)
294 .thumb_func
295 SYM (\name):
296 .endm
298 /* Function start macros. Variants for ARM and Thumb. */
300 #ifdef __thumb__
301 #define THUMB_FUNC .thumb_func
302 #define THUMB_CODE .force_thumb
303 # if defined(__thumb2__)
304 #define THUMB_SYNTAX .syntax divided
305 # else
306 #define THUMB_SYNTAX
307 # endif
308 #else
309 #define THUMB_FUNC
310 #define THUMB_CODE
311 #define THUMB_SYNTAX
312 #endif
314 .macro FUNC_START name
315 .text
316 .globl SYM (__\name)
317 TYPE (__\name)
318 .align 0
319 THUMB_CODE
320 THUMB_FUNC
321 THUMB_SYNTAX
322 SYM (__\name):
323 .endm
325 /* Special function that will always be coded in ARM assembly, even if
326 in Thumb-only compilation. */
328 #if defined(__thumb2__)
330 /* For Thumb-2 we build everything in thumb mode. */
331 .macro ARM_FUNC_START name
332 FUNC_START \name
333 .syntax unified
334 .endm
335 #define EQUIV .thumb_set
336 .macro ARM_CALL name
337 bl __\name
338 .endm
340 #elif defined(__INTERWORKING_STUBS__)
342 .macro ARM_FUNC_START name
343 FUNC_START \name
344 bx pc
346 .arm
347 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
348 directly from other local arm routines. */
349 _L__\name:
350 .endm
351 #define EQUIV .thumb_set
352 /* Branch directly to a function declared with ARM_FUNC_START.
353 Must be called in arm mode. */
354 .macro ARM_CALL name
355 bl _L__\name
356 .endm
358 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
360 .macro ARM_FUNC_START name
361 .text
362 .globl SYM (__\name)
363 TYPE (__\name)
364 .align 0
365 .arm
366 SYM (__\name):
367 .endm
368 #define EQUIV .set
369 .macro ARM_CALL name
370 bl __\name
371 .endm
373 #endif
375 .macro FUNC_ALIAS new old
376 .globl SYM (__\new)
377 #if defined (__thumb__)
378 .thumb_set SYM (__\new), SYM (__\old)
379 #else
380 .set SYM (__\new), SYM (__\old)
381 #endif
382 .endm
384 .macro ARM_FUNC_ALIAS new old
385 .globl SYM (__\new)
386 EQUIV SYM (__\new), SYM (__\old)
387 #if defined(__INTERWORKING_STUBS__)
388 .set SYM (_L__\new), SYM (_L__\old)
389 #endif
390 .endm
392 #ifdef __thumb__
393 /* Register aliases. */
395 work .req r4 @ XXXX is this safe ?
396 dividend .req r0
397 divisor .req r1
398 overdone .req r2
399 result .req r2
400 curbit .req r3
401 #endif
402 #if 0
403 ip .req r12
404 sp .req r13
405 lr .req r14
406 pc .req r15
407 #endif
409 /* ------------------------------------------------------------------------ */
410 /* Bodies of the division and modulo routines. */
411 /* ------------------------------------------------------------------------ */
412 .macro ARM_DIV_BODY dividend, divisor, result, curbit
414 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
416 clz \curbit, \dividend
417 clz \result, \divisor
418 sub \curbit, \result, \curbit
419 rsbs \curbit, \curbit, #31
420 addne \curbit, \curbit, \curbit, lsl #1
421 mov \result, #0
422 addne pc, pc, \curbit, lsl #2
424 .set shift, 32
425 .rept 32
426 .set shift, shift - 1
427 cmp \dividend, \divisor, lsl #shift
428 adc \result, \result, \result
429 subcs \dividend, \dividend, \divisor, lsl #shift
430 .endr
432 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
433 #if __ARM_ARCH__ >= 5
435 clz \curbit, \divisor
436 clz \result, \dividend
437 sub \result, \curbit, \result
438 mov \curbit, #1
439 mov \divisor, \divisor, lsl \result
440 mov \curbit, \curbit, lsl \result
441 mov \result, #0
443 #else /* __ARM_ARCH__ < 5 */
445 @ Initially shift the divisor left 3 bits if possible,
446 @ set curbit accordingly. This allows for curbit to be located
447 @ at the left end of each 4 bit nibbles in the division loop
448 @ to save one loop in most cases.
449 tst \divisor, #0xe0000000
450 moveq \divisor, \divisor, lsl #3
451 moveq \curbit, #8
452 movne \curbit, #1
454 @ Unless the divisor is very big, shift it up in multiples of
455 @ four bits, since this is the amount of unwinding in the main
456 @ division loop. Continue shifting until the divisor is
457 @ larger than the dividend.
458 1: cmp \divisor, #0x10000000
459 cmplo \divisor, \dividend
460 movlo \divisor, \divisor, lsl #4
461 movlo \curbit, \curbit, lsl #4
462 blo 1b
464 @ For very big divisors, we must shift it a bit at a time, or
465 @ we will be in danger of overflowing.
466 1: cmp \divisor, #0x80000000
467 cmplo \divisor, \dividend
468 movlo \divisor, \divisor, lsl #1
469 movlo \curbit, \curbit, lsl #1
470 blo 1b
472 mov \result, #0
474 #endif /* __ARM_ARCH__ < 5 */
476 @ Division loop
477 1: cmp \dividend, \divisor
478 subhs \dividend, \dividend, \divisor
479 orrhs \result, \result, \curbit
480 cmp \dividend, \divisor, lsr #1
481 subhs \dividend, \dividend, \divisor, lsr #1
482 orrhs \result, \result, \curbit, lsr #1
483 cmp \dividend, \divisor, lsr #2
484 subhs \dividend, \dividend, \divisor, lsr #2
485 orrhs \result, \result, \curbit, lsr #2
486 cmp \dividend, \divisor, lsr #3
487 subhs \dividend, \dividend, \divisor, lsr #3
488 orrhs \result, \result, \curbit, lsr #3
489 cmp \dividend, #0 @ Early termination?
490 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
491 movne \divisor, \divisor, lsr #4
492 bne 1b
494 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
496 .endm
497 /* ------------------------------------------------------------------------ */
498 .macro ARM_DIV2_ORDER divisor, order
500 #if __ARM_ARCH__ >= 5
502 clz \order, \divisor
503 rsb \order, \order, #31
505 #else
507 cmp \divisor, #(1 << 16)
508 movhs \divisor, \divisor, lsr #16
509 movhs \order, #16
510 movlo \order, #0
512 cmp \divisor, #(1 << 8)
513 movhs \divisor, \divisor, lsr #8
514 addhs \order, \order, #8
516 cmp \divisor, #(1 << 4)
517 movhs \divisor, \divisor, lsr #4
518 addhs \order, \order, #4
520 cmp \divisor, #(1 << 2)
521 addhi \order, \order, #3
522 addls \order, \order, \divisor, lsr #1
524 #endif
526 .endm
527 /* ------------------------------------------------------------------------ */
528 .macro ARM_MOD_BODY dividend, divisor, order, spare
530 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
532 clz \order, \divisor
533 clz \spare, \dividend
534 sub \order, \order, \spare
535 rsbs \order, \order, #31
536 addne pc, pc, \order, lsl #3
538 .set shift, 32
539 .rept 32
540 .set shift, shift - 1
541 cmp \dividend, \divisor, lsl #shift
542 subcs \dividend, \dividend, \divisor, lsl #shift
543 .endr
545 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
546 #if __ARM_ARCH__ >= 5
548 clz \order, \divisor
549 clz \spare, \dividend
550 sub \order, \order, \spare
551 mov \divisor, \divisor, lsl \order
553 #else /* __ARM_ARCH__ < 5 */
555 mov \order, #0
557 @ Unless the divisor is very big, shift it up in multiples of
558 @ four bits, since this is the amount of unwinding in the main
559 @ division loop. Continue shifting until the divisor is
560 @ larger than the dividend.
561 1: cmp \divisor, #0x10000000
562 cmplo \divisor, \dividend
563 movlo \divisor, \divisor, lsl #4
564 addlo \order, \order, #4
565 blo 1b
567 @ For very big divisors, we must shift it a bit at a time, or
568 @ we will be in danger of overflowing.
569 1: cmp \divisor, #0x80000000
570 cmplo \divisor, \dividend
571 movlo \divisor, \divisor, lsl #1
572 addlo \order, \order, #1
573 blo 1b
575 #endif /* __ARM_ARCH__ < 5 */
577 @ Perform all needed substractions to keep only the reminder.
578 @ Do comparisons in batch of 4 first.
579 subs \order, \order, #3 @ yes, 3 is intended here
580 blt 2f
582 1: cmp \dividend, \divisor
583 subhs \dividend, \dividend, \divisor
584 cmp \dividend, \divisor, lsr #1
585 subhs \dividend, \dividend, \divisor, lsr #1
586 cmp \dividend, \divisor, lsr #2
587 subhs \dividend, \dividend, \divisor, lsr #2
588 cmp \dividend, \divisor, lsr #3
589 subhs \dividend, \dividend, \divisor, lsr #3
590 cmp \dividend, #1
591 mov \divisor, \divisor, lsr #4
592 subges \order, \order, #4
593 bge 1b
595 tst \order, #3
596 teqne \dividend, #0
597 beq 5f
599 @ Either 1, 2 or 3 comparison/substractions are left.
600 2: cmn \order, #2
601 blt 4f
602 beq 3f
603 cmp \dividend, \divisor
604 subhs \dividend, \dividend, \divisor
605 mov \divisor, \divisor, lsr #1
606 3: cmp \dividend, \divisor
607 subhs \dividend, \dividend, \divisor
608 mov \divisor, \divisor, lsr #1
609 4: cmp \dividend, \divisor
610 subhs \dividend, \dividend, \divisor
613 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
615 .endm
616 /* ------------------------------------------------------------------------ */
617 .macro THUMB_DIV_MOD_BODY modulo
618 @ Load the constant 0x10000000 into our work register.
619 mov work, #1
620 lsl work, #28
621 LSYM(Loop1):
622 @ Unless the divisor is very big, shift it up in multiples of
623 @ four bits, since this is the amount of unwinding in the main
624 @ division loop. Continue shifting until the divisor is
625 @ larger than the dividend.
626 cmp divisor, work
627 bhs LSYM(Lbignum)
628 cmp divisor, dividend
629 bhs LSYM(Lbignum)
630 lsl divisor, #4
631 lsl curbit, #4
632 b LSYM(Loop1)
633 LSYM(Lbignum):
634 @ Set work to 0x80000000
635 lsl work, #3
636 LSYM(Loop2):
637 @ For very big divisors, we must shift it a bit at a time, or
638 @ we will be in danger of overflowing.
639 cmp divisor, work
640 bhs LSYM(Loop3)
641 cmp divisor, dividend
642 bhs LSYM(Loop3)
643 lsl divisor, #1
644 lsl curbit, #1
645 b LSYM(Loop2)
646 LSYM(Loop3):
647 @ Test for possible subtractions ...
648 .if \modulo
649 @ ... On the final pass, this may subtract too much from the dividend,
650 @ so keep track of which subtractions are done, we can fix them up
651 @ afterwards.
652 mov overdone, #0
653 cmp dividend, divisor
654 blo LSYM(Lover1)
655 sub dividend, dividend, divisor
656 LSYM(Lover1):
657 lsr work, divisor, #1
658 cmp dividend, work
659 blo LSYM(Lover2)
660 sub dividend, dividend, work
661 mov ip, curbit
662 mov work, #1
663 ror curbit, work
664 orr overdone, curbit
665 mov curbit, ip
666 LSYM(Lover2):
667 lsr work, divisor, #2
668 cmp dividend, work
669 blo LSYM(Lover3)
670 sub dividend, dividend, work
671 mov ip, curbit
672 mov work, #2
673 ror curbit, work
674 orr overdone, curbit
675 mov curbit, ip
676 LSYM(Lover3):
677 lsr work, divisor, #3
678 cmp dividend, work
679 blo LSYM(Lover4)
680 sub dividend, dividend, work
681 mov ip, curbit
682 mov work, #3
683 ror curbit, work
684 orr overdone, curbit
685 mov curbit, ip
686 LSYM(Lover4):
687 mov ip, curbit
688 .else
689 @ ... and note which bits are done in the result. On the final pass,
690 @ this may subtract too much from the dividend, but the result will be ok,
691 @ since the "bit" will have been shifted out at the bottom.
692 cmp dividend, divisor
693 blo LSYM(Lover1)
694 sub dividend, dividend, divisor
695 orr result, result, curbit
696 LSYM(Lover1):
697 lsr work, divisor, #1
698 cmp dividend, work
699 blo LSYM(Lover2)
700 sub dividend, dividend, work
701 lsr work, curbit, #1
702 orr result, work
703 LSYM(Lover2):
704 lsr work, divisor, #2
705 cmp dividend, work
706 blo LSYM(Lover3)
707 sub dividend, dividend, work
708 lsr work, curbit, #2
709 orr result, work
710 LSYM(Lover3):
711 lsr work, divisor, #3
712 cmp dividend, work
713 blo LSYM(Lover4)
714 sub dividend, dividend, work
715 lsr work, curbit, #3
716 orr result, work
717 LSYM(Lover4):
718 .endif
720 cmp dividend, #0 @ Early termination?
721 beq LSYM(Lover5)
722 lsr curbit, #4 @ No, any more bits to do?
723 beq LSYM(Lover5)
724 lsr divisor, #4
725 b LSYM(Loop3)
726 LSYM(Lover5):
727 .if \modulo
728 @ Any subtractions that we should not have done will be recorded in
729 @ the top three bits of "overdone". Exactly which were not needed
730 @ are governed by the position of the bit, stored in ip.
731 mov work, #0xe
732 lsl work, #28
733 and overdone, work
734 beq LSYM(Lgot_result)
736 @ If we terminated early, because dividend became zero, then the
737 @ bit in ip will not be in the bottom nibble, and we should not
738 @ perform the additions below. We must test for this though
739 @ (rather relying upon the TSTs to prevent the additions) since
740 @ the bit in ip could be in the top two bits which might then match
741 @ with one of the smaller RORs.
742 mov curbit, ip
743 mov work, #0x7
744 tst curbit, work
745 beq LSYM(Lgot_result)
747 mov curbit, ip
748 mov work, #3
749 ror curbit, work
750 tst overdone, curbit
751 beq LSYM(Lover6)
752 lsr work, divisor, #3
753 add dividend, work
754 LSYM(Lover6):
755 mov curbit, ip
756 mov work, #2
757 ror curbit, work
758 tst overdone, curbit
759 beq LSYM(Lover7)
760 lsr work, divisor, #2
761 add dividend, work
762 LSYM(Lover7):
763 mov curbit, ip
764 mov work, #1
765 ror curbit, work
766 tst overdone, curbit
767 beq LSYM(Lgot_result)
768 lsr work, divisor, #1
769 add dividend, work
770 .endif
771 LSYM(Lgot_result):
772 .endm
773 /* ------------------------------------------------------------------------ */
774 /* Start of the Real Functions */
775 /* ------------------------------------------------------------------------ */
776 #ifdef L_udivsi3
778 FUNC_START udivsi3
779 FUNC_ALIAS aeabi_uidiv udivsi3
781 #ifdef __thumb__
783 cmp divisor, #0
784 beq LSYM(Ldiv0)
785 mov curbit, #1
786 mov result, #0
788 push { work }
789 cmp dividend, divisor
790 blo LSYM(Lgot_result)
792 THUMB_DIV_MOD_BODY 0
794 mov r0, result
795 pop { work }
798 #else /* ARM version. */
800 subs r2, r1, #1
801 RETc(eq)
802 bcc LSYM(Ldiv0)
803 cmp r0, r1
804 bls 11f
805 tst r1, r2
806 beq 12f
808 ARM_DIV_BODY r0, r1, r2, r3
810 mov r0, r2
811 RET
813 11: moveq r0, #1
814 movne r0, #0
817 12: ARM_DIV2_ORDER r1, r2
819 mov r0, r0, lsr r2
822 #endif /* ARM version */
824 DIV_FUNC_END udivsi3
826 FUNC_START aeabi_uidivmod
827 #ifdef __thumb__
828 push {r0, r1, lr}
829 bl SYM(__udivsi3)
830 POP {r1, r2, r3}
831 mul r2, r0
832 sub r1, r1, r2
833 bx r3
834 #else
835 stmfd sp!, { r0, r1, lr }
836 bl SYM(__udivsi3)
837 ldmfd sp!, { r1, r2, lr }
838 mul r3, r2, r0
839 sub r1, r1, r3
841 #endif
842 FUNC_END aeabi_uidivmod
844 #endif /* L_udivsi3 */
845 /* ------------------------------------------------------------------------ */
846 #ifdef L_umodsi3
848 FUNC_START umodsi3
850 #ifdef __thumb__
852 cmp divisor, #0
853 beq LSYM(Ldiv0)
854 mov curbit, #1
855 cmp dividend, divisor
856 bhs LSYM(Lover10)
857 RET
859 LSYM(Lover10):
860 push { work }
862 THUMB_DIV_MOD_BODY 1
864 pop { work }
867 #else /* ARM version. */
869 subs r2, r1, #1 @ compare divisor with 1
870 bcc LSYM(Ldiv0)
871 cmpne r0, r1 @ compare dividend with divisor
872 moveq r0, #0
873 tsthi r1, r2 @ see if divisor is power of 2
874 andeq r0, r0, r2
875 RETc(ls)
877 ARM_MOD_BODY r0, r1, r2, r3
879 RET
881 #endif /* ARM version. */
883 DIV_FUNC_END umodsi3
885 #endif /* L_umodsi3 */
886 /* ------------------------------------------------------------------------ */
887 #ifdef L_divsi3
889 FUNC_START divsi3
890 FUNC_ALIAS aeabi_idiv divsi3
892 #ifdef __thumb__
893 cmp divisor, #0
894 beq LSYM(Ldiv0)
896 push { work }
897 mov work, dividend
898 eor work, divisor @ Save the sign of the result.
899 mov ip, work
900 mov curbit, #1
901 mov result, #0
902 cmp divisor, #0
903 bpl LSYM(Lover10)
904 neg divisor, divisor @ Loops below use unsigned.
905 LSYM(Lover10):
906 cmp dividend, #0
907 bpl LSYM(Lover11)
908 neg dividend, dividend
909 LSYM(Lover11):
910 cmp dividend, divisor
911 blo LSYM(Lgot_result)
913 THUMB_DIV_MOD_BODY 0
915 mov r0, result
916 mov work, ip
917 cmp work, #0
918 bpl LSYM(Lover12)
919 neg r0, r0
920 LSYM(Lover12):
921 pop { work }
924 #else /* ARM version. */
926 cmp r1, #0
927 eor ip, r0, r1 @ save the sign of the result.
928 beq LSYM(Ldiv0)
929 rsbmi r1, r1, #0 @ loops below use unsigned.
930 subs r2, r1, #1 @ division by 1 or -1 ?
931 beq 10f
932 movs r3, r0
933 rsbmi r3, r0, #0 @ positive dividend value
934 cmp r3, r1
935 bls 11f
936 tst r1, r2 @ divisor is power of 2 ?
937 beq 12f
939 ARM_DIV_BODY r3, r1, r0, r2
941 cmp ip, #0
942 rsbmi r0, r0, #0
943 RET
945 10: teq ip, r0 @ same sign ?
946 rsbmi r0, r0, #0
947 RET
949 11: movlo r0, #0
950 moveq r0, ip, asr #31
951 orreq r0, r0, #1
954 12: ARM_DIV2_ORDER r1, r2
956 cmp ip, #0
957 mov r0, r3, lsr r2
958 rsbmi r0, r0, #0
961 #endif /* ARM version */
963 DIV_FUNC_END divsi3
965 FUNC_START aeabi_idivmod
966 #ifdef __thumb__
967 push {r0, r1, lr}
968 bl SYM(__divsi3)
969 POP {r1, r2, r3}
970 mul r2, r0
971 sub r1, r1, r2
972 bx r3
973 #else
974 stmfd sp!, { r0, r1, lr }
975 bl SYM(__divsi3)
976 ldmfd sp!, { r1, r2, lr }
977 mul r3, r2, r0
978 sub r1, r1, r3
980 #endif
981 FUNC_END aeabi_idivmod
983 #endif /* L_divsi3 */
984 /* ------------------------------------------------------------------------ */
985 #ifdef L_modsi3
987 FUNC_START modsi3
989 #ifdef __thumb__
991 mov curbit, #1
992 cmp divisor, #0
993 beq LSYM(Ldiv0)
994 bpl LSYM(Lover10)
995 neg divisor, divisor @ Loops below use unsigned.
996 LSYM(Lover10):
997 push { work }
998 @ Need to save the sign of the dividend, unfortunately, we need
999 @ work later on. Must do this after saving the original value of
1000 @ the work register, because we will pop this value off first.
1001 push { dividend }
1002 cmp dividend, #0
1003 bpl LSYM(Lover11)
1004 neg dividend, dividend
1005 LSYM(Lover11):
1006 cmp dividend, divisor
1007 blo LSYM(Lgot_result)
1009 THUMB_DIV_MOD_BODY 1
1011 pop { work }
1012 cmp work, #0
1013 bpl LSYM(Lover12)
1014 neg dividend, dividend
1015 LSYM(Lover12):
1016 pop { work }
1017 RET
1019 #else /* ARM version. */
1021 cmp r1, #0
1022 beq LSYM(Ldiv0)
1023 rsbmi r1, r1, #0 @ loops below use unsigned.
1024 movs ip, r0 @ preserve sign of dividend
1025 rsbmi r0, r0, #0 @ if negative make positive
1026 subs r2, r1, #1 @ compare divisor with 1
1027 cmpne r0, r1 @ compare dividend with divisor
1028 moveq r0, #0
1029 tsthi r1, r2 @ see if divisor is power of 2
1030 andeq r0, r0, r2
1031 bls 10f
1033 ARM_MOD_BODY r0, r1, r2, r3
1035 10: cmp ip, #0
1036 rsbmi r0, r0, #0
1037 RET
1039 #endif /* ARM version */
1041 DIV_FUNC_END modsi3
1043 #endif /* L_modsi3 */
1044 /* ------------------------------------------------------------------------ */
1045 #ifdef L_dvmd_tls
1047 FUNC_START div0
1048 FUNC_ALIAS aeabi_idiv0 div0
1049 FUNC_ALIAS aeabi_ldiv0 div0
1053 FUNC_END aeabi_ldiv0
1054 FUNC_END aeabi_idiv0
1055 FUNC_END div0
1057 #endif /* L_divmodsi_tools */
1058 /* ------------------------------------------------------------------------ */
1059 #ifdef L_dvmd_lnx
1060 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1062 /* Constant taken from <asm/signal.h>. */
1063 #define SIGFPE 8
1065 .code 32
1066 FUNC_START div0
1068 stmfd sp!, {r1, lr}
1069 mov r0, #SIGFPE
1070 bl SYM(raise) __PLT__
1071 RETLDM r1
1073 FUNC_END div0
1075 #endif /* L_dvmd_lnx */
1076 /* ------------------------------------------------------------------------ */
1077 /* Dword shift operations. */
1078 /* All the following Dword shift variants rely on the fact that
1079 shft xxx, Reg
1080 is in fact done as
1081 shft xxx, (Reg & 255)
1082 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1083 case of logical shifts) or the sign (for asr). */
1085 #ifdef __ARMEB__
1086 #define al r1
1087 #define ah r0
1088 #else
1089 #define al r0
1090 #define ah r1
1091 #endif
1093 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1094 #ifndef __symbian__
1096 #ifdef L_lshrdi3
1098 FUNC_START lshrdi3
1099 FUNC_ALIAS aeabi_llsr lshrdi3
1101 #ifdef __thumb__
1102 lsr al, r2
1103 mov r3, ah
1104 lsr ah, r2
1105 mov ip, r3
1106 sub r2, #32
1107 lsr r3, r2
1108 orr al, r3
1109 neg r2, r2
1110 mov r3, ip
1111 lsl r3, r2
1112 orr al, r3
1114 #else
1115 subs r3, r2, #32
1116 rsb ip, r2, #32
1117 movmi al, al, lsr r2
1118 movpl al, ah, lsr r3
1119 orrmi al, al, ah, lsl ip
1120 mov ah, ah, lsr r2
1122 #endif
1123 FUNC_END aeabi_llsr
1124 FUNC_END lshrdi3
1126 #endif
1128 #ifdef L_ashrdi3
1130 FUNC_START ashrdi3
1131 FUNC_ALIAS aeabi_lasr ashrdi3
1133 #ifdef __thumb__
1134 lsr al, r2
1135 mov r3, ah
1136 asr ah, r2
1137 sub r2, #32
1138 @ If r2 is negative at this point the following step would OR
1139 @ the sign bit into all of AL. That's not what we want...
1140 bmi 1f
1141 mov ip, r3
1142 asr r3, r2
1143 orr al, r3
1144 mov r3, ip
1146 neg r2, r2
1147 lsl r3, r2
1148 orr al, r3
1150 #else
1151 subs r3, r2, #32
1152 rsb ip, r2, #32
1153 movmi al, al, lsr r2
1154 movpl al, ah, asr r3
1155 orrmi al, al, ah, lsl ip
1156 mov ah, ah, asr r2
1158 #endif
1160 FUNC_END aeabi_lasr
1161 FUNC_END ashrdi3
1163 #endif
1165 #ifdef L_ashldi3
1167 FUNC_START ashldi3
1168 FUNC_ALIAS aeabi_llsl ashldi3
1170 #ifdef __thumb__
1171 lsl ah, r2
1172 mov r3, al
1173 lsl al, r2
1174 mov ip, r3
1175 sub r2, #32
1176 lsl r3, r2
1177 orr ah, r3
1178 neg r2, r2
1179 mov r3, ip
1180 lsr r3, r2
1181 orr ah, r3
1183 #else
1184 subs r3, r2, #32
1185 rsb ip, r2, #32
1186 movmi ah, ah, lsl r2
1187 movpl ah, al, lsl r3
1188 orrmi ah, ah, al, lsr ip
1189 mov al, al, lsl r2
1191 #endif
1192 FUNC_END aeabi_llsl
1193 FUNC_END ashldi3
1195 #endif
1197 #endif /* __symbian__ */
1199 /* ------------------------------------------------------------------------ */
1200 /* These next two sections are here despite the fact that they contain Thumb
1201 assembler because their presence allows interworked code to be linked even
1202 when the GCC library is this one. */
1204 /* Do not build the interworking functions when the target architecture does
1205 not support Thumb instructions. (This can be a multilib option). */
1206 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1207 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1208 || __ARM_ARCH__ >= 6
1210 #if defined L_call_via_rX
1212 /* These labels & instructions are used by the Arm/Thumb interworking code.
1213 The address of function to be called is loaded into a register and then
1214 one of these labels is called via a BL instruction. This puts the
1215 return address into the link register with the bottom bit set, and the
1216 code here switches to the correct mode before executing the function. */
1218 .text
1219 .align 0
1220 .force_thumb
1222 .macro call_via register
1223 THUMB_FUNC_START _call_via_\register
1225 bx \register
1228 SIZE (_call_via_\register)
1229 .endm
1231 call_via r0
1232 call_via r1
1233 call_via r2
1234 call_via r3
1235 call_via r4
1236 call_via r5
1237 call_via r6
1238 call_via r7
1239 call_via r8
1240 call_via r9
1241 call_via sl
1242 call_via fp
1243 call_via ip
1244 call_via sp
1245 call_via lr
1247 #endif /* L_call_via_rX */
1249 /* Don't bother with the old interworking routines for Thumb-2. */
1250 /* ??? Maybe only omit these on v7m. */
1251 #ifndef __thumb2__
1253 #if defined L_interwork_call_via_rX
1255 /* These labels & instructions are used by the Arm/Thumb interworking code,
1256 when the target address is in an unknown instruction set. The address
1257 of function to be called is loaded into a register and then one of these
1258 labels is called via a BL instruction. This puts the return address
1259 into the link register with the bottom bit set, and the code here
1260 switches to the correct mode before executing the function. Unfortunately
1261 the target code cannot be relied upon to return via a BX instruction, so
1262 instead we have to store the resturn address on the stack and allow the
1263 called function to return here instead. Upon return we recover the real
1264 return address and use a BX to get back to Thumb mode.
1266 There are three variations of this code. The first,
1267 _interwork_call_via_rN(), will push the return address onto the
1268 stack and pop it in _arm_return(). It should only be used if all
1269 arguments are passed in registers.
1271 The second, _interwork_r7_call_via_rN(), instead stores the return
1272 address at [r7, #-4]. It is the caller's responsibility to ensure
1273 that this address is valid and contains no useful data.
1275 The third, _interwork_r11_call_via_rN(), works in the same way but
1276 uses r11 instead of r7. It is useful if the caller does not really
1277 need a frame pointer. */
1279 .text
1280 .align 0
1282 .code 32
1283 .globl _arm_return
1284 LSYM(Lstart_arm_return):
1285 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1286 cfi_push 0, 0xe, -0x8, 0x8
1287 nop @ This nop is for the benefit of debuggers, so that
1288 @ backtraces will use the correct unwind information.
1289 _arm_return:
1290 RETLDM unwind=LSYM(Lstart_arm_return)
1291 cfi_end LSYM(Lend_arm_return)
1293 .globl _arm_return_r7
1294 _arm_return_r7:
1295 ldr lr, [r7, #-4]
1296 bx lr
1298 .globl _arm_return_r11
1299 _arm_return_r11:
1300 ldr lr, [r11, #-4]
1301 bx lr
1303 .macro interwork_with_frame frame, register, name, return
1304 .code 16
1306 THUMB_FUNC_START \name
1308 bx pc
1311 .code 32
1312 tst \register, #1
1313 streq lr, [\frame, #-4]
1314 adreq lr, _arm_return_\frame
1315 bx \register
1317 SIZE (\name)
1318 .endm
1320 .macro interwork register
1321 .code 16
1323 THUMB_FUNC_START _interwork_call_via_\register
1325 bx pc
1328 .code 32
1329 .globl LSYM(Lchange_\register)
1330 LSYM(Lchange_\register):
1331 tst \register, #1
1332 streq lr, [sp, #-8]!
1333 adreq lr, _arm_return
1334 bx \register
1336 SIZE (_interwork_call_via_\register)
1338 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1339 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1340 .endm
1342 interwork r0
1343 interwork r1
1344 interwork r2
1345 interwork r3
1346 interwork r4
1347 interwork r5
1348 interwork r6
1349 interwork r7
1350 interwork r8
1351 interwork r9
1352 interwork sl
1353 interwork fp
1354 interwork ip
1355 interwork sp
1357 /* The LR case has to be handled a little differently... */
1358 .code 16
1360 THUMB_FUNC_START _interwork_call_via_lr
1362 bx pc
1365 .code 32
1366 .globl .Lchange_lr
1367 .Lchange_lr:
1368 tst lr, #1
1369 stmeqdb r13!, {lr, pc}
1370 mov ip, lr
1371 adreq lr, _arm_return
1372 bx ip
1374 SIZE (_interwork_call_via_lr)
1376 #endif /* L_interwork_call_via_rX */
1377 #endif /* !__thumb2__ */
1378 #endif /* Arch supports thumb. */
1380 #ifndef __symbian__
1381 #include "ieee754-df.S"
1382 #include "ieee754-sf.S"
1383 #include "bpabi.S"
1384 #endif /* __symbian__ */