1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright (C) 1995-2024 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* Everything in this file should now use unified syntax. */
29 /* An executable stack is *not* required for these functions. */
30 #if defined(__ELF__) && defined(__linux__)
31 .section .note.GNU-stack,"",%progbits
33 #endif /* __ELF__ and __linux__ */
36 /* Some attributes that are common to all routines in this file. */
37 /* Tag_ABI_align_needed: This code does not require 8-byte
38 alignment from the caller. */
39 /* .eabi_attribute 24, 0 -- default setting. */
40 /* Tag_ABI_align_preserved: This code preserves 8-byte
41 alignment in any callee. */
43 #endif /* __ARM_EABI__ */
44 /* ------------------------------------------------------------------------ */
46 /* We need to know what prefix to add to function names. */
48 #ifndef __USER_LABEL_PREFIX__
49 #error __USER_LABEL_PREFIX__ not defined
52 /* ANSI concatenation macros. */
54 #define CONCAT1(a, b) CONCAT2(a, b)
55 #define CONCAT2(a, b) a ## b
57 /* Use the right prefix for global labels. */
59 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
63 #define __PLT__ /* Not supported in Thumb assembler (for now). */
64 #elif defined __vxworks && !defined __PIC__
65 #define __PLT__ /* Not supported by the kernel loader. */
69 #define TYPE(x) .type SYM(x),function
70 #define SIZE(x) .size SYM(x), . - SYM(x)
79 /* Function end macros. Variants for interworking. */
81 /* There are times when we might prefer Thumb1 code even if ARM code is
82 permitted, for example, the code might be smaller, or there might be
83 interworking problems with switching to ARM state if interworking is
85 #if (defined(__thumb__) \
86 && !defined(__thumb2__) \
87 && (!defined(__THUMB_INTERWORK__) \
88 || defined (__OPTIMIZE_SIZE__) \
89 || !__ARM_ARCH_ISA_ARM))
90 # define __prefer_thumb__
93 #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
94 #define NOT_ISA_TARGET_32BIT 1
97 /* How to return from a function call depends on the architecture variant. */
99 #if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__)
102 # define RETc(x) bx##x lr
104 /* Special precautions for interworking on armv4t. */
105 # if (__ARM_ARCH == 4)
107 /* Always use bx, not ldr pc. */
108 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
109 # define __INTERWORKING__
110 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
112 /* Include thumb stub before arm mode code. */
113 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
114 # define __INTERWORKING_STUBS__
115 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
117 #endif /* __ARM_ARCH == 4 */
121 # define RET mov pc, lr
122 # define RETc(x) mov##x pc, lr
126 .macro cfi_pop advance, reg, cfa_offset
128 .pushsection .debug_frame
129 .byte 0x4 /* DW_CFA_advance_loc4 */
131 .byte (0xc0 | \reg) /* DW_CFA_restore */
132 .byte 0xe /* DW_CFA_def_cfa_offset */
137 .macro cfi_push advance, reg, offset, cfa_offset
139 .pushsection .debug_frame
140 .byte 0x4 /* DW_CFA_advance_loc4 */
142 .byte (0x80 | \reg) /* DW_CFA_offset */
143 .uleb128 (\offset / -4)
144 .byte 0xe /* DW_CFA_def_cfa_offset */
149 .macro cfi_start start_label, end_label
151 .pushsection .debug_frame
153 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
155 .4byte 0xffffffff @ CIE Identifier Tag
156 .byte 0x1 @ CIE Version
157 .ascii "\0" @ CIE Augmentation
158 .uleb128 0x1 @ CIE Code Alignment Factor
159 .sleb128 -4 @ CIE Data Alignment Factor
160 .byte 0xe @ CIE RA Column
161 .byte 0xc @ DW_CFA_def_cfa
167 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
169 .4byte LSYM(Lstart_frame) @ FDE CIE offset
170 .4byte \start_label @ FDE initial location
171 .4byte \end_label-\start_label @ FDE address range
175 .macro cfi_end end_label
177 .pushsection .debug_frame
185 /* Don't pass dirn, it's there just to get token pasting right. */
187 .macro RETLDM regs=, cond=, unwind=, dirn=ia
188 #if defined (__INTERWORKING__)
190 ldr\cond lr, [sp], #8
192 # if defined(__thumb2__)
195 ldm\cond\dirn sp!, {\regs, lr}
199 /* Mark LR as restored. */
200 97: cfi_pop 97b - \unwind, 0xe, 0x0
204 /* Caller is responsible for providing IT instruction. */
206 ldr\cond pc, [sp], #8
208 # if defined(__thumb2__)
211 ldm\cond\dirn sp!, {\regs, pc}
217 /* The Unified assembly syntax allows the same code to be assembled for both
218 ARM and Thumb-2. However this is only supported by recent gas, so define
219 a set of macros to allow ARM code on older assemblers. */
220 #if defined(__thumb2__)
221 .macro do_it cond, suffix=""
224 .macro shift1 op, arg0, arg1, arg2
225 \op \arg0, \arg1, \arg2
229 /* Perform an arithmetic operation with a variable shift operand. This
230 requires two instructions and a scratch register on Thumb-2. */
231 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
232 \shiftop \tmp, \src2, \shiftreg
233 \name \dest, \src1, \tmp
236 .macro do_it cond, suffix=""
238 .macro shift1 op, arg0, arg1, arg2
239 mov \arg0, \arg1, \op \arg2
241 #define do_push stmfd sp!,
242 #define do_pop ldmfd sp!,
243 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
244 \name \dest, \src1, \src2, \shiftop \shiftreg
248 #define COND(op1, op2, cond) op1 ## op2 ## cond
251 .macro ARM_LDIV0 name signed
253 .ifc \signed, unsigned
254 movne r0, #0xffffffff
256 movgt r0, #0x7fffffff
257 movlt r0, #0x80000000
259 b SYM (__aeabi_idiv0) __PLT__
262 .macro ARM_LDIV0 name signed
264 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
265 bl SYM (__div0) __PLT__
266 mov r0, #0 @ About as wrong as it could be.
273 .macro THUMB_LDIV0 name signed
274 #ifdef NOT_ISA_TARGET_32BIT
278 bl SYM(__aeabi_idiv0)
279 @ We know we are not on armv4t, so pop pc is safe.
282 #elif defined(__thumb2__)
284 .ifc \signed, unsigned
291 movgt r0, #0x7fffffff
293 movlt r0, #0x80000000
295 b.w SYM(__aeabi_idiv0) __PLT__
302 .ifc \signed, unsigned
303 movne r0, #0xffffffff
305 movgt r0, #0x7fffffff
306 movlt r0, #0x80000000
308 b SYM(__aeabi_idiv0) __PLT__
313 .macro THUMB_LDIV0 name signed
315 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
317 movs r0, #0 @ About as wrong as it could be.
318 #if defined (__INTERWORKING__)
331 .macro DIV_FUNC_END name signed
332 cfi_start __\name, LSYM(Lend_div0)
335 THUMB_LDIV0 \name \signed
337 ARM_LDIV0 \name \signed
339 cfi_end LSYM(Lend_div0)
343 .macro THUMB_FUNC_START name
350 /* Function start macros. Variants for ARM and Thumb. */
353 #define THUMB_FUNC .thumb_func
354 #define THUMB_CODE .force_thumb
355 # if defined(__thumb2__)
366 .macro FUNC_START name
377 .macro ARM_SYM_START name
387 /* Special function that will always be coded in ARM assembly, even if
388 in Thumb-only compilation. */
390 #if defined(__thumb2__)
392 /* For Thumb-2 we build everything in thumb mode. */
393 .macro ARM_FUNC_START name
397 #define EQUIV .thumb_set
402 #elif defined(__INTERWORKING_STUBS__)
404 .macro ARM_FUNC_START name
409 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
410 directly from other local arm routines. */
413 #define EQUIV .thumb_set
414 /* Branch directly to a function declared with ARM_FUNC_START.
415 Must be called in arm mode. */
420 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
422 #ifdef NOT_ISA_TARGET_32BIT
423 #define EQUIV .thumb_set
425 .macro ARM_FUNC_START name
441 .macro FUNC_ALIAS new old
443 #if defined (__thumb__)
444 .thumb_set SYM (__\new), SYM (__\old)
446 .set SYM (__\new), SYM (__\old)
450 #ifndef NOT_ISA_TARGET_32BIT
451 .macro ARM_FUNC_ALIAS new old
453 EQUIV SYM (__\new), SYM (__\old)
454 #if defined(__INTERWORKING_STUBS__)
455 .set SYM (_L__\new), SYM (_L__\old)
479 /* Register aliases. */
481 work .req r4 @ XXXX is this safe ?
495 /* ------------------------------------------------------------------------ */
496 /* Bodies of the division and modulo routines. */
497 /* ------------------------------------------------------------------------ */
499 .macro ARM_DIV_BODY dividend, divisor, result, curbit
501 #if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
503 #if defined (__thumb2__)
504 clz \curbit, \dividend
505 clz \result, \divisor
506 sub \curbit, \result, \curbit
507 rsb \curbit, \curbit, #31
509 add \curbit, \result, \curbit, lsl #4
516 .set shift, shift - 1
517 cmp.w \dividend, \divisor, lsl #shift
519 adc.w \result, \result, \result
521 subcs.w \dividend, \dividend, \divisor, lsl #shift
524 clz \curbit, \dividend
525 clz \result, \divisor
526 sub \curbit, \result, \curbit
527 rsbs \curbit, \curbit, #31
528 addne \curbit, \curbit, \curbit, lsl #1
530 addne pc, pc, \curbit, lsl #2
534 .set shift, shift - 1
535 cmp \dividend, \divisor, lsl #shift
536 adc \result, \result, \result
537 subcs \dividend, \dividend, \divisor, lsl #shift
541 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
542 #if defined (__ARM_FEATURE_CLZ)
544 clz \curbit, \divisor
545 clz \result, \dividend
546 sub \result, \curbit, \result
548 mov \divisor, \divisor, lsl \result
549 mov \curbit, \curbit, lsl \result
552 #else /* !defined (__ARM_FEATURE_CLZ) */
554 @ Initially shift the divisor left 3 bits if possible,
555 @ set curbit accordingly. This allows for curbit to be located
556 @ at the left end of each 4-bit nibbles in the division loop
557 @ to save one loop in most cases.
558 tst \divisor, #0xe0000000
559 moveq \divisor, \divisor, lsl #3
563 @ Unless the divisor is very big, shift it up in multiples of
564 @ four bits, since this is the amount of unwinding in the main
565 @ division loop. Continue shifting until the divisor is
566 @ larger than the dividend.
567 1: cmp \divisor, #0x10000000
568 cmplo \divisor, \dividend
569 movlo \divisor, \divisor, lsl #4
570 movlo \curbit, \curbit, lsl #4
573 @ For very big divisors, we must shift it a bit at a time, or
574 @ we will be in danger of overflowing.
575 1: cmp \divisor, #0x80000000
576 cmplo \divisor, \dividend
577 movlo \divisor, \divisor, lsl #1
578 movlo \curbit, \curbit, lsl #1
583 #endif /* !defined (__ARM_FEATURE_CLZ) */
586 1: cmp \dividend, \divisor
588 subhs \dividend, \dividend, \divisor
589 orrhs \result, \result, \curbit
590 cmp \dividend, \divisor, lsr #1
592 subhs \dividend, \dividend, \divisor, lsr #1
593 orrhs \result, \result, \curbit, lsr #1
594 cmp \dividend, \divisor, lsr #2
596 subhs \dividend, \dividend, \divisor, lsr #2
597 orrhs \result, \result, \curbit, lsr #2
598 cmp \dividend, \divisor, lsr #3
600 subhs \dividend, \dividend, \divisor, lsr #3
601 orrhs \result, \result, \curbit, lsr #3
602 cmp \dividend, #0 @ Early termination?
604 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
605 movne \divisor, \divisor, lsr #4
608 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
611 /* ------------------------------------------------------------------------ */
612 .macro ARM_DIV2_ORDER divisor, order
614 #if defined (__ARM_FEATURE_CLZ)
617 rsb \order, \order, #31
621 cmp \divisor, #(1 << 16)
622 movhs \divisor, \divisor, lsr #16
626 cmp \divisor, #(1 << 8)
627 movhs \divisor, \divisor, lsr #8
628 addhs \order, \order, #8
630 cmp \divisor, #(1 << 4)
631 movhs \divisor, \divisor, lsr #4
632 addhs \order, \order, #4
634 cmp \divisor, #(1 << 2)
635 addhi \order, \order, #3
636 addls \order, \order, \divisor, lsr #1
641 /* ------------------------------------------------------------------------ */
642 .macro ARM_MOD_BODY dividend, divisor, order, spare
644 #if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
647 clz \spare, \dividend
648 sub \order, \order, \spare
649 rsbs \order, \order, #31
650 addne pc, pc, \order, lsl #3
654 .set shift, shift - 1
655 cmp \dividend, \divisor, lsl #shift
656 subcs \dividend, \dividend, \divisor, lsl #shift
659 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
660 #if defined (__ARM_FEATURE_CLZ)
663 clz \spare, \dividend
664 sub \order, \order, \spare
665 mov \divisor, \divisor, lsl \order
667 #else /* !defined (__ARM_FEATURE_CLZ) */
671 @ Unless the divisor is very big, shift it up in multiples of
672 @ four bits, since this is the amount of unwinding in the main
673 @ division loop. Continue shifting until the divisor is
674 @ larger than the dividend.
675 1: cmp \divisor, #0x10000000
676 cmplo \divisor, \dividend
677 movlo \divisor, \divisor, lsl #4
678 addlo \order, \order, #4
681 @ For very big divisors, we must shift it a bit at a time, or
682 @ we will be in danger of overflowing.
683 1: cmp \divisor, #0x80000000
684 cmplo \divisor, \dividend
685 movlo \divisor, \divisor, lsl #1
686 addlo \order, \order, #1
689 #endif /* !defined (__ARM_FEATURE_CLZ) */
691 @ Perform all needed substractions to keep only the reminder.
692 @ Do comparisons in batch of 4 first.
693 subs \order, \order, #3 @ yes, 3 is intended here
696 1: cmp \dividend, \divisor
697 subhs \dividend, \dividend, \divisor
698 cmp \dividend, \divisor, lsr #1
699 subhs \dividend, \dividend, \divisor, lsr #1
700 cmp \dividend, \divisor, lsr #2
701 subhs \dividend, \dividend, \divisor, lsr #2
702 cmp \dividend, \divisor, lsr #3
703 subhs \dividend, \dividend, \divisor, lsr #3
705 mov \divisor, \divisor, lsr #4
706 subges \order, \order, #4
713 @ Either 1, 2 or 3 comparison/substractions are left.
717 cmp \dividend, \divisor
718 subhs \dividend, \dividend, \divisor
719 mov \divisor, \divisor, lsr #1
720 3: cmp \dividend, \divisor
721 subhs \dividend, \dividend, \divisor
722 mov \divisor, \divisor, lsr #1
723 4: cmp \dividend, \divisor
724 subhs \dividend, \dividend, \divisor
727 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
730 /* ------------------------------------------------------------------------ */
731 .macro THUMB_DIV_MOD_BODY modulo
732 @ Load the constant 0x10000000 into our work register.
736 @ Unless the divisor is very big, shift it up in multiples of
737 @ four bits, since this is the amount of unwinding in the main
738 @ division loop. Continue shifting until the divisor is
739 @ larger than the dividend.
742 cmp divisor, dividend
748 @ Set work to 0x80000000
751 @ For very big divisors, we must shift it a bit at a time, or
752 @ we will be in danger of overflowing.
755 cmp divisor, dividend
761 @ Test for possible subtractions ...
763 @ ... On the final pass, this may subtract too much from the dividend,
764 @ so keep track of which subtractions are done, we can fix them up
767 cmp dividend, divisor
769 subs dividend, dividend, divisor
771 lsrs work, divisor, #1
774 subs dividend, dividend, work
778 orrs overdone, curbit
781 lsrs work, divisor, #2
784 subs dividend, dividend, work
788 orrs overdone, curbit
791 lsrs work, divisor, #3
794 subs dividend, dividend, work
798 orrs overdone, curbit
803 @ ... and note which bits are done in the result. On the final pass,
804 @ this may subtract too much from the dividend, but the result will be ok,
805 @ since the "bit" will have been shifted out at the bottom.
806 cmp dividend, divisor
808 subs dividend, dividend, divisor
809 orrs result, result, curbit
811 lsrs work, divisor, #1
814 subs dividend, dividend, work
815 lsrs work, curbit, #1
818 lsrs work, divisor, #2
821 subs dividend, dividend, work
822 lsrs work, curbit, #2
825 lsrs work, divisor, #3
828 subs dividend, dividend, work
829 lsrs work, curbit, #3
834 cmp dividend, #0 @ Early termination?
836 lsrs curbit, #4 @ No, any more bits to do?
842 @ Any subtractions that we should not have done will be recorded in
843 @ the top three bits of "overdone". Exactly which were not needed
844 @ are governed by the position of the bit, stored in ip.
848 beq LSYM(Lgot_result)
850 @ If we terminated early, because dividend became zero, then the
851 @ bit in ip will not be in the bottom nibble, and we should not
852 @ perform the additions below. We must test for this though
853 @ (rather relying upon the TSTs to prevent the additions) since
854 @ the bit in ip could be in the top two bits which might then match
855 @ with one of the smaller RORs.
859 beq LSYM(Lgot_result)
866 lsrs work, divisor, #3
874 lsrs work, divisor, #2
881 beq LSYM(Lgot_result)
882 lsrs work, divisor, #1
888 /* If performance is preferred, the following functions are provided. */
889 #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
891 /* Branch to div(n), and jump to label if curbit is lo than divisior. */
892 .macro BranchToDiv n, label
893 lsrs curbit, dividend, \n
898 /* Body of div(n). Shift the divisor in n bits and compare the divisor
899 and dividend. Update the dividend as the substruction result. */
901 lsrs curbit, dividend, \n
904 lsls curbit, divisor, \n
905 subs dividend, dividend, curbit
907 1: adcs result, result
910 /* The body of division with positive divisor. Unless the divisor is very
911 big, shift it up in multiples of four bits, since this is the amount of
912 unwinding in the main division loop. Continue shifting until the divisor
913 is larger than the dividend. */
914 .macro THUMB1_Div_Positive
916 BranchToDiv #1, LSYM(Lthumb1_div1)
917 BranchToDiv #4, LSYM(Lthumb1_div4)
918 BranchToDiv #8, LSYM(Lthumb1_div8)
919 BranchToDiv #12, LSYM(Lthumb1_div12)
920 BranchToDiv #16, LSYM(Lthumb1_div16)
921 LSYM(Lthumb1_div_large_positive):
923 lsls divisor, divisor, #8
925 lsrs curbit, dividend, #16
929 lsls divisor, divisor, #8
930 beq LSYM(Ldivbyzero_waypoint)
932 1: lsrs curbit, dividend, #12
934 blo LSYM(Lthumb1_div12)
935 b LSYM(Lthumb1_div16)
936 LSYM(Lthumb1_div_loop):
937 lsrs divisor, divisor, #8
948 bcs LSYM(Lthumb1_div_loop)
962 subs divisor, dividend, divisor
964 cpy divisor, dividend
966 1: adcs result, result
970 LSYM(Ldivbyzero_waypoint):
974 /* The body of division with negative divisor. Similar with
975 THUMB1_Div_Positive except that the shift steps are in multiples
977 .macro THUMB1_Div_Negative
978 lsrs result, divisor, #31
980 negs divisor, divisor
982 1: asrs curbit, dividend, #32
984 negs dividend, dividend
986 2: eors curbit, result
989 BranchToDiv #4, LSYM(Lthumb1_div_negative4)
990 BranchToDiv #8, LSYM(Lthumb1_div_negative8)
991 LSYM(Lthumb1_div_large):
993 lsls divisor, divisor, #6
995 lsrs curbit, dividend, #8
997 blo LSYM(Lthumb1_div_negative8)
999 lsls divisor, divisor, #6
1000 asrs result, result, #6
1002 blo LSYM(Lthumb1_div_negative8)
1004 lsls divisor, divisor, #6
1005 asrs result, result, #6
1007 blo LSYM(Lthumb1_div_negative8)
1009 lsls divisor, divisor, #6
1010 beq LSYM(Ldivbyzero_negative)
1011 asrs result, result, #6
1012 b LSYM(Lthumb1_div_negative8)
1013 LSYM(Lthumb1_div_negative_loop):
1014 lsrs divisor, divisor, #6
1015 LSYM(Lthumb1_div_negative8):
1020 LSYM(Lthumb1_div_negative4):
1023 bcs LSYM(Lthumb1_div_negative_loop)
1025 subs divisor, dividend, divisor
1027 cpy divisor, dividend
1031 asrs curbit, curbit, #1
1032 cpy dividend, result
1034 negs dividend, dividend
1038 negs divisor, divisor
1042 LSYM(Ldivbyzero_negative):
1044 asrs curbit, curbit, #1
1046 negs dividend, dividend
1048 #endif /* ARM Thumb version. */
1050 /* ------------------------------------------------------------------------ */
1051 /* Start of the Real Functions */
1052 /* ------------------------------------------------------------------------ */
1055 #if defined(__prefer_thumb__)
1058 FUNC_ALIAS aeabi_uidiv udivsi3
1059 #if defined(__OPTIMIZE_SIZE__)
1063 LSYM(udivsi3_skip_div0_test):
1068 cmp dividend, divisor
1069 blo LSYM(Lgot_result)
1071 THUMB_DIV_MOD_BODY 0
1077 /* Implementation of aeabi_uidiv for ARMv6m. This version is only
1078 used in ARMv6-M when we need an efficient implementation. */
1080 LSYM(udivsi3_skip_div0_test):
1083 #endif /* __OPTIMIZE_SIZE__ */
1085 #elif defined(__ARM_ARCH_EXT_IDIV__)
1087 ARM_FUNC_START udivsi3
1088 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1096 #else /* ARM version/Thumb-2. */
1098 ARM_FUNC_START udivsi3
1099 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1101 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1102 check for division-by-zero a second time. */
1103 LSYM(udivsi3_skip_div0_test):
1113 ARM_DIV_BODY r0, r1, r2, r3
1123 12: ARM_DIV2_ORDER r1, r2
1128 #endif /* ARM version */
1130 DIV_FUNC_END udivsi3 unsigned
1132 #if defined(__prefer_thumb__)
1133 FUNC_START aeabi_uidivmod
1136 # if defined(__OPTIMIZE_SIZE__)
1138 bl LSYM(udivsi3_skip_div0_test)
1144 /* Both the quotient and remainder are calculated simultaneously
1145 in THUMB1_Div_Positive. There is no need to calculate the
1146 remainder again here. */
1147 b LSYM(udivsi3_skip_div0_test)
1149 # endif /* __OPTIMIZE_SIZE__ */
1151 #elif defined(__ARM_ARCH_EXT_IDIV__)
1152 ARM_FUNC_START aeabi_uidivmod
1160 ARM_FUNC_START aeabi_uidivmod
1163 stmfd sp!, { r0, r1, lr }
1164 bl LSYM(udivsi3_skip_div0_test)
1165 ldmfd sp!, { r1, r2, lr }
1170 FUNC_END aeabi_uidivmod
1172 #endif /* L_udivsi3 */
1173 /* ------------------------------------------------------------------------ */
1176 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1178 ARM_FUNC_START umodsi3
1186 #elif defined(__thumb__)
1193 cmp dividend, divisor
1200 THUMB_DIV_MOD_BODY 1
1205 #else /* ARM version. */
1209 subs r2, r1, #1 @ compare divisor with 1
1211 cmpne r0, r1 @ compare dividend with divisor
1213 tsthi r1, r2 @ see if divisor is power of 2
1217 ARM_MOD_BODY r0, r1, r2, r3
1221 #endif /* ARM version. */
1223 DIV_FUNC_END umodsi3 unsigned
1225 #endif /* L_umodsi3 */
1226 /* ------------------------------------------------------------------------ */
1229 #if defined(__prefer_thumb__)
1232 FUNC_ALIAS aeabi_idiv divsi3
1233 #if defined(__OPTIMIZE_SIZE__)
1237 LSYM(divsi3_skip_div0_test):
1240 eors work, divisor @ Save the sign of the result.
1246 negs divisor, divisor @ Loops below use unsigned.
1250 negs dividend, dividend
1252 cmp dividend, divisor
1253 blo LSYM(Lgot_result)
1255 THUMB_DIV_MOD_BODY 0
1266 /* Implementation of aeabi_idiv for ARMv6m. This version is only
1267 used in ARMv6-M when we need an efficient implementation. */
1269 LSYM(divsi3_skip_div0_test):
1270 cpy curbit, dividend
1271 orrs curbit, divisor
1272 bmi LSYM(Lthumb1_div_negative)
1274 LSYM(Lthumb1_div_positive):
1277 LSYM(Lthumb1_div_negative):
1280 #endif /* __OPTIMIZE_SIZE__ */
1282 #elif defined(__ARM_ARCH_EXT_IDIV__)
1284 ARM_FUNC_START divsi3
1285 ARM_FUNC_ALIAS aeabi_idiv divsi3
1292 #else /* ARM/Thumb-2 version. */
1294 ARM_FUNC_START divsi3
1295 ARM_FUNC_ALIAS aeabi_idiv divsi3
1299 LSYM(divsi3_skip_div0_test):
1300 eor ip, r0, r1 @ save the sign of the result.
1302 rsbmi r1, r1, #0 @ loops below use unsigned.
1303 subs r2, r1, #1 @ division by 1 or -1 ?
1307 rsbmi r3, r0, #0 @ positive dividend value
1310 tst r1, r2 @ divisor is power of 2 ?
1313 ARM_DIV_BODY r3, r1, r0, r2
1320 10: teq ip, r0 @ same sign ?
1328 moveq r0, ip, asr #31
1332 12: ARM_DIV2_ORDER r1, r2
1340 #endif /* ARM version */
1342 DIV_FUNC_END divsi3 signed
1344 #if defined(__prefer_thumb__)
1345 FUNC_START aeabi_idivmod
1348 # if defined(__OPTIMIZE_SIZE__)
1350 bl LSYM(divsi3_skip_div0_test)
1356 /* Both the quotient and remainder are calculated simultaneously
1357 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
1358 need to calculate the remainder again here. */
1359 b LSYM(divsi3_skip_div0_test)
1361 # endif /* __OPTIMIZE_SIZE__ */
1363 #elif defined(__ARM_ARCH_EXT_IDIV__)
1364 ARM_FUNC_START aeabi_idivmod
1372 ARM_FUNC_START aeabi_idivmod
1375 stmfd sp!, { r0, r1, lr }
1376 bl LSYM(divsi3_skip_div0_test)
1377 ldmfd sp!, { r1, r2, lr }
1382 FUNC_END aeabi_idivmod
1384 #endif /* L_divsi3 */
1385 /* ------------------------------------------------------------------------ */
1388 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1390 ARM_FUNC_START modsi3
1399 #elif defined(__thumb__)
1407 negs divisor, divisor @ Loops below use unsigned.
1410 @ Need to save the sign of the dividend, unfortunately, we need
1411 @ work later on. Must do this after saving the original value of
1412 @ the work register, because we will pop this value off first.
1416 negs dividend, dividend
1418 cmp dividend, divisor
1419 blo LSYM(Lgot_result)
1421 THUMB_DIV_MOD_BODY 1
1426 negs dividend, dividend
1431 #else /* ARM version. */
1437 rsbmi r1, r1, #0 @ loops below use unsigned.
1438 movs ip, r0 @ preserve sign of dividend
1439 rsbmi r0, r0, #0 @ if negative make positive
1440 subs r2, r1, #1 @ compare divisor with 1
1441 cmpne r0, r1 @ compare dividend with divisor
1443 tsthi r1, r2 @ see if divisor is power of 2
1447 ARM_MOD_BODY r0, r1, r2, r3
1453 #endif /* ARM version */
1455 DIV_FUNC_END modsi3 signed
1457 #endif /* L_modsi3 */
1458 /* ------------------------------------------------------------------------ */
1464 FUNC_START aeabi_idiv0
1465 FUNC_START aeabi_ldiv0
1467 FUNC_END aeabi_ldiv0
1468 FUNC_END aeabi_idiv0
1475 #endif /* L_divmodsi_tools */
1476 /* ------------------------------------------------------------------------ */
1478 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1480 /* Constant taken from <asm/signal.h>. */
1484 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1487 ARM_FUNC_START aeabi_idiv0
1488 ARM_FUNC_START aeabi_ldiv0
1490 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1492 cfi_start __div0, LSYM(Lend_div0)
1495 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8
1499 bl SYM(raise) __PLT__
1500 RETLDM r1 unwind=98b
1503 cfi_end LSYM(Lend_aeabi_ldiv0)
1504 FUNC_END aeabi_ldiv0
1505 FUNC_END aeabi_idiv0
1507 cfi_end LSYM(Lend_div0)
1511 #endif /* L_dvmd_lnx */
1512 #ifdef L_clear_cache
1513 #if defined __ARM_EABI__ && defined __linux__
1514 @ EABI GNU/Linux call to cacheflush syscall.
1515 ARM_FUNC_START clear_cache
1517 #if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)
1528 FUNC_END clear_cache
1530 #error "This is only for ARM EABI GNU/Linux"
1532 #endif /* L_clear_cache */
1534 #ifdef L_speculation_barrier
1535 FUNC_START speculation_barrier
1539 #elif defined __ARM_EABI__ && defined __linux__
1540 /* We don't have a speculation barrier directly for this
1541 platform/architecture variant. But we can use a kernel
1542 clear_cache service routine which will emit such instructions
1543 if run on a later version of the architecture. We don't
1544 really want to flush the cache, but we must give it a valid
1545 address, so just clear pc..pc+1. */
1546 #if defined __thumb__ && !defined __thumb2__
1558 #ifdef __ARM_ARCH_6T2__
1565 add r0, pc, #0 /* ADR. */
1570 #endif /* Thumb1 only */
1572 #warning "No speculation barrier defined for this platform"
1575 FUNC_END speculation_barrier
1577 /* ------------------------------------------------------------------------ */
1578 /* Dword shift operations. */
1579 /* All the following Dword shift variants rely on the fact that
1582 shft xxx, (Reg & 255)
1583 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1584 case of logical shifts) or the sign (for asr). */
1594 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1600 FUNC_ALIAS aeabi_llsr lshrdi3
1618 movmi al, al, lsr r2
1619 movpl al, ah, lsr r3
1620 orrmi al, al, ah, lsl ip
1632 FUNC_ALIAS aeabi_lasr ashrdi3
1639 @ If r2 is negative at this point the following step would OR
1640 @ the sign bit into all of AL. That's not what we want...
1654 movmi al, al, lsr r2
1655 movpl al, ah, asr r3
1656 orrmi al, al, ah, lsl ip
1669 FUNC_ALIAS aeabi_llsl ashldi3
1687 movmi ah, ah, lsl r2
1688 movpl ah, al, lsl r3
1689 orrmi ah, ah, al, lsr ip
1698 #endif /* __symbian__ */
1701 #ifdef NOT_ISA_TARGET_32BIT
1706 cmp r0, r3 /* 0x10000 */
1711 cmp r0, r3 /* #0x100 */
1716 cmp r0, r3 /* #0x10 */
1726 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1729 ARM_FUNC_START clzsi2
1730 # if defined (__ARM_FEATURE_CLZ)
1737 movcs r0, r0, lsr #16
1741 movcs r0, r0, lsr #8
1745 movcs r0, r0, lsr #4
1753 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1754 # endif /* !defined (__ARM_FEATURE_CLZ) */
1757 #endif /* L_clzsi2 */
1760 #if !defined (__ARM_FEATURE_CLZ)
1762 # ifdef NOT_ISA_TARGET_32BIT
1784 # else /* NOT_ISA_TARGET_32BIT */
1785 ARM_FUNC_START clzdi2
1807 # endif /* NOT_ISA_TARGET_32BIT */
1809 #else /* defined (__ARM_FEATURE_CLZ) */
1811 ARM_FUNC_START clzdi2
1821 #endif /* L_clzdi2 */
1824 #ifdef NOT_ISA_TARGET_32BIT
1831 cmp r0, r3 /* 0x10000 */
1836 cmp r0, r3 /* #0x100 */
1841 cmp r0, r3 /* #0x10 */
1851 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1854 ARM_FUNC_START ctzsi2
1857 # if defined (__ARM_FEATURE_CLZ)
1865 movcs r0, r0, lsr #16
1869 movcs r0, r0, lsr #8
1873 movcs r0, r0, lsr #4
1881 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1882 # endif /* !defined (__ARM_FEATURE_CLZ) */
1885 #endif /* L_clzsi2 */
1887 /* ------------------------------------------------------------------------ */
1888 /* These next two sections are here despite the fact that they contain Thumb
1889 assembler because their presence allows interworked code to be linked even
1890 when the GCC library is this one. */
1892 /* Do not build the interworking functions when the target architecture does
1893 not support Thumb instructions. (This can be a multilib option). */
1894 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1895 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1898 #if defined L_call_via_rX
1900 /* These labels & instructions are used by the Arm/Thumb interworking code.
1901 The address of function to be called is loaded into a register and then
1902 one of these labels is called via a BL instruction. This puts the
1903 return address into the link register with the bottom bit set, and the
1904 code here switches to the correct mode before executing the function. */
1910 .macro call_via register
1911 THUMB_FUNC_START _call_via_\register
1916 SIZE (_call_via_\register)
1935 #endif /* L_call_via_rX */
1937 /* Don't bother with the old interworking routines for Thumb-2. */
1938 /* ??? Maybe only omit these on "m" variants. */
1939 #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1941 #if defined L_interwork_call_via_rX
1943 /* These labels & instructions are used by the Arm/Thumb interworking code,
1944 when the target address is in an unknown instruction set. The address
1945 of function to be called is loaded into a register and then one of these
1946 labels is called via a BL instruction. This puts the return address
1947 into the link register with the bottom bit set, and the code here
1948 switches to the correct mode before executing the function. Unfortunately
1949 the target code cannot be relied upon to return via a BX instruction, so
1950 instead we have to store the resturn address on the stack and allow the
1951 called function to return here instead. Upon return we recover the real
1952 return address and use a BX to get back to Thumb mode.
1954 There are three variations of this code. The first,
1955 _interwork_call_via_rN(), will push the return address onto the
1956 stack and pop it in _arm_return(). It should only be used if all
1957 arguments are passed in registers.
1959 The second, _interwork_r7_call_via_rN(), instead stores the return
1960 address at [r7, #-4]. It is the caller's responsibility to ensure
1961 that this address is valid and contains no useful data.
1963 The third, _interwork_r11_call_via_rN(), works in the same way but
1964 uses r11 instead of r7. It is useful if the caller does not really
1965 need a frame pointer. */
1972 LSYM(Lstart_arm_return):
1973 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1974 cfi_push 0, 0xe, -0x8, 0x8
1975 nop @ This nop is for the benefit of debuggers, so that
1976 @ backtraces will use the correct unwind information.
1978 RETLDM unwind=LSYM(Lstart_arm_return)
1979 cfi_end LSYM(Lend_arm_return)
1981 .globl _arm_return_r7
1986 .globl _arm_return_r11
1991 .macro interwork_with_frame frame, register, name, return
1994 THUMB_FUNC_START \name
2001 streq lr, [\frame, #-4]
2002 adreq lr, _arm_return_\frame
2008 .macro interwork register
2011 THUMB_FUNC_START _interwork_call_via_\register
2017 .globl LSYM(Lchange_\register)
2018 LSYM(Lchange_\register):
2020 streq lr, [sp, #-8]!
2021 adreq lr, _arm_return
2024 SIZE (_interwork_call_via_\register)
2026 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2027 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2045 /* The LR case has to be handled a little differently... */
2048 THUMB_FUNC_START _interwork_call_via_lr
2057 stmeqdb r13!, {lr, pc}
2059 adreq lr, _arm_return
2062 SIZE (_interwork_call_via_lr)
2064 #endif /* L_interwork_call_via_rX */
2065 #endif /* !__thumb2__ */
2067 /* Functions to support compact pic switch tables in thumb1 state.
2068 All these routines take an index into the table in r0. The
2069 table is at LR & ~1 (but this must be rounded up in the case
2070 of 32-bit entires). They are only permitted to clobber r12
2071 and r14 and r0 must be preserved on exit. */
2072 #ifdef L_thumb1_case_sqi
2078 THUMB_FUNC_START __gnu_thumb1_case_sqi
2088 SIZE (__gnu_thumb1_case_sqi)
2091 #ifdef L_thumb1_case_uqi
2097 THUMB_FUNC_START __gnu_thumb1_case_uqi
2107 SIZE (__gnu_thumb1_case_uqi)
2110 #ifdef L_thumb1_case_shi
2116 THUMB_FUNC_START __gnu_thumb1_case_shi
2127 SIZE (__gnu_thumb1_case_shi)
2130 #ifdef L_thumb1_case_uhi
2136 THUMB_FUNC_START __gnu_thumb1_case_uhi
2147 SIZE (__gnu_thumb1_case_uhi)
2151 /* Null implementation of __sync_synchronize, for use when
2152 it is known that the system is single threaded. */
2155 FUNC_START sync_synchronize_none
2157 FUNC_END sync_synchronize_none
2161 /* Full memory barrier using DMB. Requires Armv7 (all profiles)
2162 or armv6-m, or later. */
2165 #if __ARM_ARCH_PROFILE == 'M'
2170 FUNC_START sync_synchronize_dmb
2171 /* M-profile devices only support SY as the synchronization level,
2172 but that's probably what we want here anyway. */
2175 FUNC_END sync_synchronize_dmb
2178 #ifdef L_sync_cp15dmb
2179 #ifndef NOT_ISA_TARGET_32BIT
2180 /* Implementation of DMB using CP15 operations. This was first
2181 defined in Armv6, but deprecated in Armv7 and can give
2182 sub-optimal performance. */
2185 ARM_FUNC_START sync_synchronize_cp15dmb
2186 mcr p15, 0, r0, c7, c10, 5
2188 FUNC_END sync_synchronize_cp15dmb
2192 #ifdef L_sync_synchronize
2193 /* Generic version of the synchronization primitive. If we know
2194 that DMB exists, then use it. Otherwise, arrange for a link
2195 time warning explaining how to pick a suitable alternative.
2196 We choose not to use CP15DMB because it is performance
2197 deprecated. We only define this function if generating
2198 ELF binaries as otherwise we can't rely on the warning being
2204 FUNC_START sync_synchronize
2205 #if __ARM_ARCH >= 7 || __ARM_ARCH_PROFILE == 'M'
2209 FUNC_END sync_synchronize
2210 #if !(__ARM_ARCH >= 7 || __ARM_ARCH_PROFILE == 'M')
2211 .section .gnu.warning.__sync_synchronize
2213 .ascii "This implementation of __sync_synchronize is a stub with "
2214 .ascii "no effect. Relink with\n"
2215 .ascii " -specs=sync-{none,dmb,cp15dmb}.specs\n"
2216 .ascii "to specify exactly which barrier format to use and avoid "
2217 .ascii "this warning\0"
2222 #ifdef L_thumb1_case_si
2228 THUMB_FUNC_START __gnu_thumb1_case_si
2231 adds.n r1, r1, #2 /* Align to word. */
2239 mov pc, lr /* We know we were called from thumb code. */
2240 SIZE (__gnu_thumb1_case_si)
2243 #endif /* Arch supports thumb. */
2245 .macro CFI_START_FUNCTION
2250 .macro CFI_END_FUNCTION
2256 /* The condition here must match the one in gcc/config/arm/elf.h and
2257 libgcc/config/arm/t-elf. */
2258 #ifndef NOT_ISA_TARGET_32BIT
2259 #include "ieee754-df.S"
2260 #include "ieee754-sf.S"
2262 #else /* NOT_ISA_TARGET_32BIT */
2263 #include "bpabi-v6m.S"
2264 #endif /* NOT_ISA_TARGET_32BIT */
2265 #endif /* !__symbian__ */