Fix linknamespace parallel test failures.
[glibc.git] / sysdeps / arm / memcpy.S
blobdb8ba50af35ad21294c76cbfe9f857210449faa0
1 /* Copyright (C) 2006-2016 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library.  If not, see
18    <http://www.gnu.org/licenses/>.  */
20 /* Thumb requires excessive IT insns here.  */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
26  * Data preload for architectures that support it (ARM V5TE and above)
27  */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31      && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...)    code
33 #else
34 #define PLD(code...)
35 #endif
38  * This can be used to enable code to cacheline align the source pointer.
39  * Experiments on tested architectures (StrongARM and XScale) didn't show
40  * this a worthwhile thing to do.  That might be different in the future.
41  */
42 //#define CALGN(code...)        code
43 #define CALGN(code...)
46  * Endian independent macros for shifting bytes within registers.
47  */
48 #ifndef __ARMEB__
49 #define PULL            lsr
50 #define PUSH            lsl
51 #else
52 #define PULL            lsl
53 #define PUSH            lsr
54 #endif
56                 .text
57                 .syntax unified
59 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
61 ENTRY(memcpy)
63                 push    {r0, r4, lr}
64                 cfi_adjust_cfa_offset (12)
65                 cfi_rel_offset (r4, 4)
66                 cfi_rel_offset (lr, 8)
68                 cfi_remember_state
70                 subs    r2, r2, #4
71                 blt     8f
72                 ands    ip, r0, #3
73         PLD(    sfi_pld r1, #0                  )
74                 bne     9f
75                 ands    ip, r1, #3
76                 bne     10f
78 1:              subs    r2, r2, #(28)
79                 push    {r5 - r8}
80                 cfi_adjust_cfa_offset (16)
81                 cfi_rel_offset (r5, 0)
82                 cfi_rel_offset (r6, 4)
83                 cfi_rel_offset (r7, 8)
84                 cfi_rel_offset (r8, 12)
85                 blt     5f
87         CALGN(  ands    ip, r1, #31             )
88         CALGN(  rsb     r3, ip, #32             )
89         CALGN(  sbcsne  r4, r3, r2              )  @ C is always set here
90         CALGN(  bcs     2f                      )
91         CALGN(  adr     r4, 6f                  )
92         CALGN(  subs    r2, r2, r3              )  @ C gets set
93 #ifndef ARM_ALWAYS_BX
94         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95 #else
96         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97         CALGN(  bx      r4                      )
98 #endif
100         PLD(    sfi_pld r1, #0                  )
101 2:      PLD(    subs    r2, r2, #96             )
102         PLD(    sfi_pld r1, #28                 )
103         PLD(    blt     4f                      )
104         PLD(    sfi_pld r1, #60                 )
105         PLD(    sfi_pld r1, #92                 )
107 3:      PLD(    sfi_pld r1, #124                )
108 4:              sfi_breg r1, \
109                 ldmia   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
110                 subs    r2, r2, #32
111                 sfi_breg r0, \
112                 stmia   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
113                 bge     3b
114         PLD(    cmn     r2, #96                 )
115         PLD(    bge     4b                      )
117 5:              ands    ip, r2, #28
118                 rsb     ip, ip, #32
119 #ifndef ARM_ALWAYS_BX
120                 /* C is always clear here.  */
121                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
122                 b       7f
123 #else
124                 beq     7f
125                 push    {r10}
126                 cfi_adjust_cfa_offset (4)
127                 cfi_rel_offset (r10, 0)
128 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
129                 /* If alignment is not perfect, then there will be some
130                    padding (nop) instructions between this BX and label 6.
131                    The computation above assumed that two instructions
132                    later is exactly the right spot.  */
133                 add     r10, #(6f - (0b + PC_OFS))
134                 bx      r10
135 #endif
136                 .p2align ARM_BX_ALIGN_LOG2
137 6:              nop
138                 .p2align ARM_BX_ALIGN_LOG2
139                 sfi_breg r1, \
140                 ldr     r3, [\B], #4
141                 .p2align ARM_BX_ALIGN_LOG2
142                 sfi_breg r1, \
143                 ldr     r4, [\B], #4
144                 .p2align ARM_BX_ALIGN_LOG2
145                 sfi_breg r1, \
146                 ldr     r5, [\B], #4
147                 .p2align ARM_BX_ALIGN_LOG2
148                 sfi_breg r1, \
149                 ldr     r6, [\B], #4
150                 .p2align ARM_BX_ALIGN_LOG2
151                 sfi_breg r1, \
152                 ldr     r7, [\B], #4
153                 .p2align ARM_BX_ALIGN_LOG2
154                 sfi_breg r1, \
155                 ldr     r8, [\B], #4
156                 .p2align ARM_BX_ALIGN_LOG2
157                 sfi_breg r1, \
158                 ldr     lr, [\B], #4
160 #ifndef ARM_ALWAYS_BX
161                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
162                 nop
163 #else
164 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
165                 /* If alignment is not perfect, then there will be some
166                    padding (nop) instructions between this BX and label 66.
167                    The computation above assumed that two instructions
168                    later is exactly the right spot.  */
169                 add     r10, #(66f - (0b + PC_OFS))
170                 bx      r10
171 #endif
172                 .p2align ARM_BX_ALIGN_LOG2
173 66:             nop
174                 .p2align ARM_BX_ALIGN_LOG2
175                 sfi_breg r0, \
176                 str     r3, [\B], #4
177                 .p2align ARM_BX_ALIGN_LOG2
178                 sfi_breg r0, \
179                 str     r4, [\B], #4
180                 .p2align ARM_BX_ALIGN_LOG2
181                 sfi_breg r0, \
182                 str     r5, [\B], #4
183                 .p2align ARM_BX_ALIGN_LOG2
184                 sfi_breg r0, \
185                 str     r6, [\B], #4
186                 .p2align ARM_BX_ALIGN_LOG2
187                 sfi_breg r0, \
188                 str     r7, [\B], #4
189                 .p2align ARM_BX_ALIGN_LOG2
190                 sfi_breg r0, \
191                 str     r8, [\B], #4
192                 .p2align ARM_BX_ALIGN_LOG2
193                 sfi_breg r0, \
194                 str     lr, [\B], #4
196 #ifdef ARM_ALWAYS_BX
197                 pop     {r10}
198                 cfi_adjust_cfa_offset (-4)
199                 cfi_restore (r10)
200 #endif
202         CALGN(  bcs     2b                      )
204 7:              pop     {r5 - r8}
205                 cfi_adjust_cfa_offset (-16)
206                 cfi_restore (r5)
207                 cfi_restore (r6)
208                 cfi_restore (r7)
209                 cfi_restore (r8)
211 8:              movs    r2, r2, lsl #31
212                 sfi_breg r1, \
213                 ldrbne  r3, [\B], #1
214                 sfi_breg r1, \
215                 ldrbcs  r4, [\B], #1
216                 sfi_breg r1, \
217                 ldrbcs  ip, [\B]
218                 sfi_breg r0, \
219                 strbne  r3, [\B], #1
220                 sfi_breg r0, \
221                 strbcs  r4, [\B], #1
222                 sfi_breg r0, \
223                 strbcs  ip, [\B]
225 #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
226      || defined (ARM_ALWAYS_BX))
227                 pop     {r0, r4, lr}
228                 cfi_adjust_cfa_offset (-12)
229                 cfi_restore (r4)
230                 cfi_restore (lr)
231                 bx      lr
232 #else
233                 pop     {r0, r4, pc}
234 #endif
236                 cfi_restore_state
238 9:              rsb     ip, ip, #4
239                 cmp     ip, #2
240                 sfi_breg r1, \
241                 ldrbgt  r3, [\B], #1
242                 sfi_breg r1, \
243                 ldrbge  r4, [\B], #1
244                 sfi_breg r1, \
245                 ldrb    lr, [\B], #1
246                 sfi_breg r0, \
247                 strbgt  r3, [\B], #1
248                 sfi_breg r0, \
249                 strbge  r4, [\B], #1
250                 subs    r2, r2, ip
251                 sfi_breg r0, \
252                 strb    lr, [\B], #1
253                 blt     8b
254                 ands    ip, r1, #3
255                 beq     1b
257 10:             bic     r1, r1, #3
258                 cmp     ip, #2
259                 sfi_breg r1, \
260                 ldr     lr, [\B], #4
261                 beq     17f
262                 bgt     18f
265                 .macro  forward_copy_shift pull push
267                 subs    r2, r2, #28
268                 blt     14f
270         CALGN(  ands    ip, r1, #31             )
271         CALGN(  rsb     ip, ip, #32             )
272         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
273         CALGN(  subcc   r2, r2, ip              )
274         CALGN(  bcc     15f                     )
276 11:             push    {r5 - r8, r10}
277                 cfi_adjust_cfa_offset (20)
278                 cfi_rel_offset (r5, 0)
279                 cfi_rel_offset (r6, 4)
280                 cfi_rel_offset (r7, 8)
281                 cfi_rel_offset (r8, 12)
282                 cfi_rel_offset (r10, 16)
284         PLD(    sfi_pld r1, #0                  )
285         PLD(    subs    r2, r2, #96             )
286         PLD(    sfi_pld r1, #28                 )
287         PLD(    blt     13f                     )
288         PLD(    sfi_pld r1, #60                 )
289         PLD(    sfi_pld r1, #92                 )
291 12:     PLD(    sfi_pld r1, #124                )
292 13:             sfi_breg r1, \
293                 ldmia   \B!, {r4, r5, r6, r7}
294                 mov     r3, lr, PULL #\pull
295                 subs    r2, r2, #32
296                 sfi_breg r1, \
297                 ldmia   \B!, {r8, r10, ip, lr}
298                 orr     r3, r3, r4, PUSH #\push
299                 mov     r4, r4, PULL #\pull
300                 orr     r4, r4, r5, PUSH #\push
301                 mov     r5, r5, PULL #\pull
302                 orr     r5, r5, r6, PUSH #\push
303                 mov     r6, r6, PULL #\pull
304                 orr     r6, r6, r7, PUSH #\push
305                 mov     r7, r7, PULL #\pull
306                 orr     r7, r7, r8, PUSH #\push
307                 mov     r8, r8, PULL #\pull
308                 orr     r8, r8, r10, PUSH #\push
309                 mov     r10, r10, PULL #\pull
310                 orr     r10, r10, ip, PUSH #\push
311                 mov     ip, ip, PULL #\pull
312                 orr     ip, ip, lr, PUSH #\push
313                 sfi_breg r0, \
314                 stmia   \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
315                 bge     12b
316         PLD(    cmn     r2, #96                 )
317         PLD(    bge     13b                     )
319                 pop     {r5 - r8, r10}
320                 cfi_adjust_cfa_offset (-20)
321                 cfi_restore (r5)
322                 cfi_restore (r6)
323                 cfi_restore (r7)
324                 cfi_restore (r8)
325                 cfi_restore (r10)
327 14:             ands    ip, r2, #28
328                 beq     16f
330 15:             mov     r3, lr, PULL #\pull
331                 sfi_breg r1, \
332                 ldr     lr, [\B], #4
333                 subs    ip, ip, #4
334                 orr     r3, r3, lr, PUSH #\push
335                 sfi_breg r0, \
336                 str     r3, [\B], #4
337                 bgt     15b
338         CALGN(  cmp     r2, #0                  )
339         CALGN(  bge     11b                     )
341 16:             sub     r1, r1, #(\push / 8)
342                 b       8b
344                 .endm
347                 forward_copy_shift      pull=8  push=24
349 17:             forward_copy_shift      pull=16 push=16
351 18:             forward_copy_shift      pull=24 push=8
353 END(memcpy)
354 libc_hidden_builtin_def (memcpy)