1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
20 /* Thumb requires excessive IT insns here. */
23 #include <arm-features.h>
26 * Data preload for architectures that support it (ARM V5TE and above)
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...) code
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
42 //#define CALGN(code...) code
43 #define CALGN(code...)
46 * Endian independent macros for shifting bytes within registers.
60 * Prototype: void *memmove(void *dest, const void *src, size_t n);
64 * If the memory regions don't overlap, we simply branch to memcpy which is
65 * normally a bit faster. Otherwise the copy is done going downwards.
75 bls HIDDEN_JUMPTARGET(memcpy)
79 cfi_adjust_cfa_offset (12)
80 cfi_rel_offset (r4, 4)
81 cfi_rel_offset (lr, 8)
90 PLD( sfi_pld r1, #-4 )
97 cfi_adjust_cfa_offset (16)
98 cfi_rel_offset (r5, 0)
99 cfi_rel_offset (r6, 4)
100 cfi_rel_offset (r7, 8)
101 cfi_rel_offset (r8, 12)
104 CALGN( ands ip, r1, #31 )
105 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
108 CALGN( subs r2, r2, ip ) @ C is set here
109 #ifndef ARM_ALWAYS_BX
110 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
112 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
116 PLD( sfi_pld r1, #-4 )
117 2: PLD( subs r2, r2, #96 )
118 PLD( sfi_pld r1, #-32 )
120 PLD( sfi_pld r1, #-64 )
121 PLD( sfi_pld r1, #-96 )
123 3: PLD( sfi_pld r1, #-128 )
125 ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
128 stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
135 #ifndef ARM_ALWAYS_BX
136 /* C is always clear here. */
137 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
142 cfi_adjust_cfa_offset (4)
143 cfi_rel_offset (r10, 0)
144 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
145 /* If alignment is not perfect, then there will be some
146 padding (nop) instructions between this BX and label 6.
147 The computation above assumed that two instructions
148 later is exactly the right spot. */
149 add r10, #(6f - (0b + PC_OFS))
152 .p2align ARM_BX_ALIGN_LOG2
154 .p2align ARM_BX_ALIGN_LOG2
157 .p2align ARM_BX_ALIGN_LOG2
160 .p2align ARM_BX_ALIGN_LOG2
163 .p2align ARM_BX_ALIGN_LOG2
166 .p2align ARM_BX_ALIGN_LOG2
169 .p2align ARM_BX_ALIGN_LOG2
172 .p2align ARM_BX_ALIGN_LOG2
176 #ifndef ARM_ALWAYS_BX
177 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
180 0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
181 /* If alignment is not perfect, then there will be some
182 padding (nop) instructions between this BX and label 66.
183 The computation above assumed that two instructions
184 later is exactly the right spot. */
185 add r10, #(66f - (0b + PC_OFS))
188 .p2align ARM_BX_ALIGN_LOG2
190 .p2align ARM_BX_ALIGN_LOG2
193 .p2align ARM_BX_ALIGN_LOG2
196 .p2align ARM_BX_ALIGN_LOG2
199 .p2align ARM_BX_ALIGN_LOG2
202 .p2align ARM_BX_ALIGN_LOG2
205 .p2align ARM_BX_ALIGN_LOG2
208 .p2align ARM_BX_ALIGN_LOG2
214 cfi_adjust_cfa_offset (-4)
221 cfi_adjust_cfa_offset (-16)
227 8: movs r2, r2, lsl #31
229 ldrbne r3, [\B, #-1]!
231 ldrbcs r4, [\B, #-1]!
235 strbne r3, [\B, #-1]!
237 strbcs r4, [\B, #-1]!
241 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
242 || defined (ARM_ALWAYS_BX))
244 cfi_adjust_cfa_offset (-12)
256 ldrbgt r3, [\B, #-1]!
258 ldrbge r4, [\B, #-1]!
262 strbgt r3, [\B, #-1]!
264 strbge r4, [\B, #-1]!
280 .macro backward_copy_shift push pull
285 CALGN( ands ip, r1, #31 )
286 CALGN( rsb ip, ip, #32 )
287 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
288 CALGN( subcc r2, r2, ip )
291 11: push {r5 - r8, r10}
292 cfi_adjust_cfa_offset (20)
293 cfi_rel_offset (r5, 0)
294 cfi_rel_offset (r6, 4)
295 cfi_rel_offset (r7, 8)
296 cfi_rel_offset (r8, 12)
297 cfi_rel_offset (r10, 16)
299 PLD( sfi_pld r1, #-4 )
300 PLD( subs r2, r2, #96 )
301 PLD( sfi_pld r1, #-32 )
303 PLD( sfi_pld r1, #-64 )
304 PLD( sfi_pld r1, #-96 )
306 12: PLD( sfi_pld r1, #-128 )
308 ldmdb \B!, {r7, r8, r10, ip}
309 mov lr, r3, PUSH #\push
312 ldmdb \B!, {r3, r4, r5, r6}
313 orr lr, lr, ip, PULL #\pull
314 mov ip, ip, PUSH #\push
315 orr ip, ip, r10, PULL #\pull
316 mov r10, r10, PUSH #\push
317 orr r10, r10, r8, PULL #\pull
318 mov r8, r8, PUSH #\push
319 orr r8, r8, r7, PULL #\pull
320 mov r7, r7, PUSH #\push
321 orr r7, r7, r6, PULL #\pull
322 mov r6, r6, PUSH #\push
323 orr r6, r6, r5, PULL #\pull
324 mov r5, r5, PUSH #\push
325 orr r5, r5, r4, PULL #\pull
326 mov r4, r4, PUSH #\push
327 orr r4, r4, r3, PULL #\pull
329 stmdb \B!, {r4 - r8, r10, ip, lr}
335 cfi_adjust_cfa_offset (-20)
345 15: mov lr, r3, PUSH #\push
349 orr lr, lr, r3, PULL #\pull
356 16: add r1, r1, #(\pull / 8)
362 backward_copy_shift push=8 pull=24
364 17: backward_copy_shift push=16 pull=16
366 18: backward_copy_shift push=24 pull=8
370 libc_hidden_builtin_def (memmove)