i386: Move futex functions from lowlevellock.h to lowlevellock-futex.h.
[glibc.git] / sysdeps / arm / memmove.S
blob488e1562a300baf005e35bde2f4caa8795d9ff6f
1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library.  If not, see
18    <http://www.gnu.org/licenses/>.  */
20 /* Thumb requires excessive IT insns here.  */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
26  * Data preload for architectures that support it (ARM V5TE and above)
27  */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31      && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...)    code
33 #else
34 #define PLD(code...)
35 #endif
38  * This can be used to enable code to cacheline align the source pointer.
39  * Experiments on tested architectures (StrongARM and XScale) didn't show
40  * this a worthwhile thing to do.  That might be different in the future.
41  */
42 //#define CALGN(code...)        code
43 #define CALGN(code...)
46  * Endian independent macros for shifting bytes within registers.
47  */
48 #ifndef __ARMEB__
49 #define PULL            lsr
50 #define PUSH            lsl
51 #else
52 #define PULL            lsl
53 #define PUSH            lsr
54 #endif
56                 .text
57                 .syntax unified
60  * Prototype: void *memmove(void *dest, const void *src, size_t n);
61  *
62  * Note:
63  *
64  * If the memory regions don't overlap, we simply branch to memcpy which is
65  * normally a bit faster. Otherwise the copy is done going downwards.
66  */
68 ENTRY(memmove)
70                 subs    ip, r0, r1
71                 cmphi   r2, ip
72 #if !IS_IN (libc)
73                 bls     memcpy
74 #else
75                 bls     HIDDEN_JUMPTARGET(memcpy)
76 #endif
78                 push    {r0, r4, lr}
79                 cfi_adjust_cfa_offset (12)
80                 cfi_rel_offset (r4, 4)
81                 cfi_rel_offset (lr, 8)
83                 cfi_remember_state
85                 add     r1, r1, r2
86                 add     r0, r0, r2
87                 subs    r2, r2, #4
88                 blt     8f
89                 ands    ip, r0, #3
90         PLD(    sfi_pld r1, #-4                 )
91                 bne     9f
92                 ands    ip, r1, #3
93                 bne     10f
95 1:              subs    r2, r2, #(28)
96                 push    {r5 - r8}
97                 cfi_adjust_cfa_offset (16)
98                 cfi_rel_offset (r5, 0)
99                 cfi_rel_offset (r6, 4)
100                 cfi_rel_offset (r7, 8)
101                 cfi_rel_offset (r8, 12)
102                 blt     5f
104         CALGN(  ands    ip, r1, #31             )
105         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
106         CALGN(  bcs     2f                      )
107         CALGN(  adr     r4, 6f                  )
108         CALGN(  subs    r2, r2, ip              )  @ C is set here
109 #ifndef ARM_ALWAYS_BX
110         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
111 #else
112         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
113         CALGN(  bx      r4                      )
114 #endif
116         PLD(    sfi_pld r1, #-4                 )
117 2:      PLD(    subs    r2, r2, #96             )
118         PLD(    sfi_pld r1, #-32                )
119         PLD(    blt     4f                      )
120         PLD(    sfi_pld r1, #-64                )
121         PLD(    sfi_pld r1, #-96                )
123 3:      PLD(    sfi_pld r1, #-128               )
124 4:              sfi_breg r1, \
125                 ldmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
126                 subs    r2, r2, #32
127                 sfi_breg r0, \
128                 stmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
129                 bge     3b
130         PLD(    cmn     r2, #96                 )
131         PLD(    bge     4b                      )
133 5:              ands    ip, r2, #28
134                 rsb     ip, ip, #32
135 #ifndef ARM_ALWAYS_BX
136                 /* C is always clear here.  */
137                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
138                 b       7f
139 #else
140                 beq     7f
141                 push    {r10}
142                 cfi_adjust_cfa_offset (4)
143                 cfi_rel_offset (r10, 0)
144                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
145                 bx      r10
146 #endif
147                 .p2align ARM_BX_ALIGN_LOG2
148 6:              nop
149                 .p2align ARM_BX_ALIGN_LOG2
150                 sfi_breg r1, \
151                 ldr     r3, [\B, #-4]!
152                 .p2align ARM_BX_ALIGN_LOG2
153                 sfi_breg r1, \
154                 ldr     r4, [\B, #-4]!
155                 .p2align ARM_BX_ALIGN_LOG2
156                 sfi_breg r1, \
157                 ldr     r5, [\B, #-4]!
158                 .p2align ARM_BX_ALIGN_LOG2
159                 sfi_breg r1, \
160                 ldr     r6, [\B, #-4]!
161                 .p2align ARM_BX_ALIGN_LOG2
162                 sfi_breg r1, \
163                 ldr     r7, [\B, #-4]!
164                 .p2align ARM_BX_ALIGN_LOG2
165                 sfi_breg r1, \
166                 ldr     r8, [\B, #-4]!
167                 .p2align ARM_BX_ALIGN_LOG2
168                 sfi_breg r1, \
169                 ldr     lr, [\B, #-4]!
171 #ifndef ARM_ALWAYS_BX
172                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
173                 nop
174 #else
175                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
176                 bx      r10
177 #endif
178                 .p2align ARM_BX_ALIGN_LOG2
179                 nop
180                 .p2align ARM_BX_ALIGN_LOG2
181                 sfi_breg r0, \
182                 str     r3, [\B, #-4]!
183                 .p2align ARM_BX_ALIGN_LOG2
184                 sfi_breg r0, \
185                 str     r4, [\B, #-4]!
186                 .p2align ARM_BX_ALIGN_LOG2
187                 sfi_breg r0, \
188                 str     r5, [\B, #-4]!
189                 .p2align ARM_BX_ALIGN_LOG2
190                 sfi_breg r0, \
191                 str     r6, [\B, #-4]!
192                 .p2align ARM_BX_ALIGN_LOG2
193                 sfi_breg r0, \
194                 str     r7, [\B, #-4]!
195                 .p2align ARM_BX_ALIGN_LOG2
196                 sfi_breg r0, \
197                 str     r8, [\B, #-4]!
198                 .p2align ARM_BX_ALIGN_LOG2
199                 sfi_breg r0, \
200                 str     lr, [\B, #-4]!
202 #ifdef ARM_ALWAYS_BX
203                 pop     {r10}
204                 cfi_adjust_cfa_offset (-4)
205                 cfi_restore (r10)
206 #endif
208         CALGN(  bcs     2b                      )
210 7:              pop     {r5 - r8}
211                 cfi_adjust_cfa_offset (-16)
212                 cfi_restore (r5)
213                 cfi_restore (r6)
214                 cfi_restore (r7)
215                 cfi_restore (r8)
217 8:              movs    r2, r2, lsl #31
218                 sfi_breg r1, \
219                 ldrbne  r3, [\B, #-1]!
220                 sfi_breg r1, \
221                 ldrbcs  r4, [\B, #-1]!
222                 sfi_breg r1, \
223                 ldrbcs  ip, [\B, #-1]
224                 sfi_breg r0, \
225                 strbne  r3, [\B, #-1]!
226                 sfi_breg r0, \
227                 strbcs  r4, [\B, #-1]!
228                 sfi_breg r0, \
229                 strbcs  ip, [\B, #-1]
231 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
232      || defined (ARM_ALWAYS_BX))
233                 pop     {r0, r4, lr}
234                 cfi_adjust_cfa_offset (-12)
235                 cfi_restore (r4)
236                 cfi_restore (lr)
237                 bx      lr
238 #else
239                 pop     {r0, r4, pc}
240 #endif
242                 cfi_restore_state
244 9:              cmp     ip, #2
245                 sfi_breg r1, \
246                 ldrbgt  r3, [\B, #-1]!
247                 sfi_breg r1, \
248                 ldrbge  r4, [\B, #-1]!
249                 sfi_breg r1, \
250                 ldrb    lr, [\B, #-1]!
251                 sfi_breg r0, \
252                 strbgt  r3, [\B, #-1]!
253                 sfi_breg r0, \
254                 strbge  r4, [\B, #-1]!
255                 subs    r2, r2, ip
256                 sfi_breg r0, \
257                 strb    lr, [\B, #-1]!
258                 blt     8b
259                 ands    ip, r1, #3
260                 beq     1b
262 10:             bic     r1, r1, #3
263                 cmp     ip, #2
264                 sfi_breg r1, \
265                 ldr     r3, [\B, #0]
266                 beq     17f
267                 blt     18f
270                 .macro  backward_copy_shift push pull
272                 subs    r2, r2, #28
273                 blt     14f
275         CALGN(  ands    ip, r1, #31             )
276         CALGN(  rsb     ip, ip, #32             )
277         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
278         CALGN(  subcc   r2, r2, ip              )
279         CALGN(  bcc     15f                     )
281 11:             push    {r5 - r8, r10}
282                 cfi_adjust_cfa_offset (20)
283                 cfi_rel_offset (r5, 0)
284                 cfi_rel_offset (r6, 4)
285                 cfi_rel_offset (r7, 8)
286                 cfi_rel_offset (r8, 12)
287                 cfi_rel_offset (r10, 16)
289         PLD(    sfi_pld r1, #-4                 )
290         PLD(    subs    r2, r2, #96             )
291         PLD(    sfi_pld r1, #-32                )
292         PLD(    blt     13f                     )
293         PLD(    sfi_pld r1, #-64                )
294         PLD(    sfi_pld r1, #-96                )
296 12:     PLD(    sfi_pld r1, #-128               )
297 13:             sfi_breg r1, \
298                 ldmdb   \B!, {r7, r8, r10, ip}
299                 mov     lr, r3, PUSH #\push
300                 subs    r2, r2, #32
301                 sfi_breg r1, \
302                 ldmdb   \B!, {r3, r4, r5, r6}
303                 orr     lr, lr, ip, PULL #\pull
304                 mov     ip, ip, PUSH #\push
305                 orr     ip, ip, r10, PULL #\pull
306                 mov     r10, r10, PUSH #\push
307                 orr     r10, r10, r8, PULL #\pull
308                 mov     r8, r8, PUSH #\push
309                 orr     r8, r8, r7, PULL #\pull
310                 mov     r7, r7, PUSH #\push
311                 orr     r7, r7, r6, PULL #\pull
312                 mov     r6, r6, PUSH #\push
313                 orr     r6, r6, r5, PULL #\pull
314                 mov     r5, r5, PUSH #\push
315                 orr     r5, r5, r4, PULL #\pull
316                 mov     r4, r4, PUSH #\push
317                 orr     r4, r4, r3, PULL #\pull
318                 sfi_breg r0, \
319                 stmdb   \B!, {r4 - r8, r10, ip, lr}
320                 bge     12b
321         PLD(    cmn     r2, #96                 )
322         PLD(    bge     13b                     )
324                 pop     {r5 - r8, r10}
325                 cfi_adjust_cfa_offset (-20)
326                 cfi_restore (r5)
327                 cfi_restore (r6)
328                 cfi_restore (r7)
329                 cfi_restore (r8)
330                 cfi_restore (r10)
332 14:             ands    ip, r2, #28
333                 beq     16f
335 15:             mov     lr, r3, PUSH #\push
336                 sfi_breg r1, \
337                 ldr     r3, [\B, #-4]!
338                 subs    ip, ip, #4
339                 orr     lr, lr, r3, PULL #\pull
340                 sfi_breg r0, \
341                 str     lr, [\B, #-4]!
342                 bgt     15b
343         CALGN(  cmp     r2, #0                  )
344         CALGN(  bge     11b                     )
346 16:             add     r1, r1, #(\pull / 8)
347                 b       8b
349                 .endm
352                 backward_copy_shift     push=8  pull=24
354 17:             backward_copy_shift     push=16 pull=16
356 18:             backward_copy_shift     push=24 pull=8
359 END(memmove)
360 libc_hidden_builtin_def (memmove)