Update copyright notices with scripts/update-copyrights
[glibc.git] / ports / sysdeps / arm / memcpy.S
blob3e985dad68dfaf72b8b805118dde407f909ec19c
1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library.  If not, see
18    <http://www.gnu.org/licenses/>.  */
20 /* Thumb requires excessive IT insns here.  */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
26  * Data preload for architectures that support it (ARM V5TE and above)
27  */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31      && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...)    code
33 #else
34 #define PLD(code...)
35 #endif
38  * This can be used to enable code to cacheline align the source pointer.
39  * Experiments on tested architectures (StrongARM and XScale) didn't show
40  * this a worthwhile thing to do.  That might be different in the future.
41  */
42 //#define CALGN(code...)        code
43 #define CALGN(code...)
46  * Endian independent macros for shifting bytes within registers.
47  */
48 #ifndef __ARMEB__
49 #define PULL            lsr
50 #define PUSH            lsl
51 #else
52 #define PULL            lsl
53 #define PUSH            lsr
54 #endif
56                 .text
57                 .syntax unified
59 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
61 ENTRY(memcpy)
63                 push    {r0, r4, lr}
64                 cfi_adjust_cfa_offset (12)
65                 cfi_rel_offset (r4, 4)
66                 cfi_rel_offset (lr, 8)
68                 cfi_remember_state
70                 subs    r2, r2, #4
71                 blt     8f
72                 ands    ip, r0, #3
73         PLD(    sfi_pld r1, #0                  )
74                 bne     9f
75                 ands    ip, r1, #3
76                 bne     10f
78 1:              subs    r2, r2, #(28)
79                 push    {r5 - r8}
80                 cfi_adjust_cfa_offset (16)
81                 cfi_rel_offset (r5, 0)
82                 cfi_rel_offset (r6, 4)
83                 cfi_rel_offset (r7, 8)
84                 cfi_rel_offset (r8, 12)
85                 blt     5f
87         CALGN(  ands    ip, r1, #31             )
88         CALGN(  rsb     r3, ip, #32             )
89         CALGN(  sbcsne  r4, r3, r2              )  @ C is always set here
90         CALGN(  bcs     2f                      )
91         CALGN(  adr     r4, 6f                  )
92         CALGN(  subs    r2, r2, r3              )  @ C gets set
93 #ifndef ARM_ALWAYS_BX
94         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95 #else
96         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97         CALGN(  bx      r4                      )
98 #endif
100         PLD(    sfi_pld r1, #0                  )
101 2:      PLD(    subs    r2, r2, #96             )
102         PLD(    sfi_pld r1, #28                 )
103         PLD(    blt     4f                      )
104         PLD(    sfi_pld r1, #60                 )
105         PLD(    sfi_pld r1, #92                 )
107 3:      PLD(    sfi_pld r1, #124                )
108 4:              sfi_breg r1, \
109                 ldmia   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
110                 subs    r2, r2, #32
111                 sfi_breg r0, \
112                 stmia   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
113                 bge     3b
114         PLD(    cmn     r2, #96                 )
115         PLD(    bge     4b                      )
117 5:              ands    ip, r2, #28
118                 rsb     ip, ip, #32
119 #ifndef ARM_ALWAYS_BX
120                 /* C is always clear here.  */
121                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
122                 b       7f
123 #else
124                 beq     7f
125                 push    {r10}
126                 cfi_adjust_cfa_offset (4)
127                 cfi_rel_offset (r10, 0)
128                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
129                 bx      r10
130 #endif
131                 .p2align ARM_BX_ALIGN_LOG2
132 6:              nop
133                 .p2align ARM_BX_ALIGN_LOG2
134                 sfi_breg r1, \
135                 ldr     r3, [\B], #4
136                 .p2align ARM_BX_ALIGN_LOG2
137                 sfi_breg r1, \
138                 ldr     r4, [\B], #4
139                 .p2align ARM_BX_ALIGN_LOG2
140                 sfi_breg r1, \
141                 ldr     r5, [\B], #4
142                 .p2align ARM_BX_ALIGN_LOG2
143                 sfi_breg r1, \
144                 ldr     r6, [\B], #4
145                 .p2align ARM_BX_ALIGN_LOG2
146                 sfi_breg r1, \
147                 ldr     r7, [\B], #4
148                 .p2align ARM_BX_ALIGN_LOG2
149                 sfi_breg r1, \
150                 ldr     r8, [\B], #4
151                 .p2align ARM_BX_ALIGN_LOG2
152                 sfi_breg r1, \
153                 ldr     lr, [\B], #4
155 #ifndef ARM_ALWAYS_BX
156                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
157                 nop
158 #else
159                 add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
160                 bx      r10
161 #endif
162                 .p2align ARM_BX_ALIGN_LOG2
163                 nop
164                 .p2align ARM_BX_ALIGN_LOG2
165                 sfi_breg r0, \
166                 str     r3, [\B], #4
167                 .p2align ARM_BX_ALIGN_LOG2
168                 sfi_breg r0, \
169                 str     r4, [\B], #4
170                 .p2align ARM_BX_ALIGN_LOG2
171                 sfi_breg r0, \
172                 str     r5, [\B], #4
173                 .p2align ARM_BX_ALIGN_LOG2
174                 sfi_breg r0, \
175                 str     r6, [\B], #4
176                 .p2align ARM_BX_ALIGN_LOG2
177                 sfi_breg r0, \
178                 str     r7, [\B], #4
179                 .p2align ARM_BX_ALIGN_LOG2
180                 sfi_breg r0, \
181                 str     r8, [\B], #4
182                 .p2align ARM_BX_ALIGN_LOG2
183                 sfi_breg r0, \
184                 str     lr, [\B], #4
186 #ifdef ARM_ALWAYS_BX
187                 pop     {r10}
188                 cfi_adjust_cfa_offset (-4)
189                 cfi_restore (r10)
190 #endif
192         CALGN(  bcs     2b                      )
194 7:              pop     {r5 - r8}
195                 cfi_adjust_cfa_offset (-16)
196                 cfi_restore (r5)
197                 cfi_restore (r6)
198                 cfi_restore (r7)
199                 cfi_restore (r8)
201 8:              movs    r2, r2, lsl #31
202                 sfi_breg r1, \
203                 ldrbne  r3, [\B], #1
204                 sfi_breg r1, \
205                 ldrbcs  r4, [\B], #1
206                 sfi_breg r1, \
207                 ldrbcs  ip, [\B]
208                 sfi_breg r0, \
209                 strbne  r3, [\B], #1
210                 sfi_breg r0, \
211                 strbcs  r4, [\B], #1
212                 sfi_breg r0, \
213                 strbcs  ip, [\B]
215 #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
216      || defined (ARM_ALWAYS_BX))
217                 pop     {r0, r4, lr}
218                 cfi_adjust_cfa_offset (-12)
219                 cfi_restore (r4)
220                 cfi_restore (lr)
221                 bx      lr
222 #else
223                 pop     {r0, r4, pc}
224 #endif
226                 cfi_restore_state
228 9:              rsb     ip, ip, #4
229                 cmp     ip, #2
230                 sfi_breg r1, \
231                 ldrbgt  r3, [\B], #1
232                 sfi_breg r1, \
233                 ldrbge  r4, [\B], #1
234                 sfi_breg r1, \
235                 ldrb    lr, [\B], #1
236                 sfi_breg r0, \
237                 strbgt  r3, [\B], #1
238                 sfi_breg r0, \
239                 strbge  r4, [\B], #1
240                 subs    r2, r2, ip
241                 sfi_breg r0, \
242                 strb    lr, [\B], #1
243                 blt     8b
244                 ands    ip, r1, #3
245                 beq     1b
247 10:             bic     r1, r1, #3
248                 cmp     ip, #2
249                 sfi_breg r1, \
250                 ldr     lr, [\B], #4
251                 beq     17f
252                 bgt     18f
255                 .macro  forward_copy_shift pull push
257                 subs    r2, r2, #28
258                 blt     14f
260         CALGN(  ands    ip, r1, #31             )
261         CALGN(  rsb     ip, ip, #32             )
262         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
263         CALGN(  subcc   r2, r2, ip              )
264         CALGN(  bcc     15f                     )
266 11:             push    {r5 - r8, r10}
267                 cfi_adjust_cfa_offset (20)
268                 cfi_rel_offset (r5, 0)
269                 cfi_rel_offset (r6, 4)
270                 cfi_rel_offset (r7, 8)
271                 cfi_rel_offset (r8, 12)
272                 cfi_rel_offset (r10, 16)
274         PLD(    sfi_pld r1, #0                  )
275         PLD(    subs    r2, r2, #96             )
276         PLD(    sfi_pld r1, #28                 )
277         PLD(    blt     13f                     )
278         PLD(    sfi_pld r1, #60                 )
279         PLD(    sfi_pld r1, #92                 )
281 12:     PLD(    sfi_pld r1, #124                )
282 13:             sfi_breg r1, \
283                 ldmia   \B!, {r4, r5, r6, r7}
284                 mov     r3, lr, PULL #\pull
285                 subs    r2, r2, #32
286                 sfi_breg r1, \
287                 ldmia   \B!, {r8, r10, ip, lr}
288                 orr     r3, r3, r4, PUSH #\push
289                 mov     r4, r4, PULL #\pull
290                 orr     r4, r4, r5, PUSH #\push
291                 mov     r5, r5, PULL #\pull
292                 orr     r5, r5, r6, PUSH #\push
293                 mov     r6, r6, PULL #\pull
294                 orr     r6, r6, r7, PUSH #\push
295                 mov     r7, r7, PULL #\pull
296                 orr     r7, r7, r8, PUSH #\push
297                 mov     r8, r8, PULL #\pull
298                 orr     r8, r8, r10, PUSH #\push
299                 mov     r10, r10, PULL #\pull
300                 orr     r10, r10, ip, PUSH #\push
301                 mov     ip, ip, PULL #\pull
302                 orr     ip, ip, lr, PUSH #\push
303                 sfi_breg r0, \
304                 stmia   \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
305                 bge     12b
306         PLD(    cmn     r2, #96                 )
307         PLD(    bge     13b                     )
309                 pop     {r5 - r8, r10}
310                 cfi_adjust_cfa_offset (-20)
311                 cfi_restore (r5)
312                 cfi_restore (r6)
313                 cfi_restore (r7)
314                 cfi_restore (r8)
315                 cfi_restore (r10)
317 14:             ands    ip, r2, #28
318                 beq     16f
320 15:             mov     r3, lr, PULL #\pull
321                 sfi_breg r1, \
322                 ldr     lr, [\B], #4
323                 subs    ip, ip, #4
324                 orr     r3, r3, lr, PUSH #\push
325                 sfi_breg r0, \
326                 str     r3, [\B], #4
327                 bgt     15b
328         CALGN(  cmp     r2, #0                  )
329         CALGN(  bge     11b                     )
331 16:             sub     r1, r1, #(\push / 8)
332                 b       8b
334                 .endm
337                 forward_copy_shift      pull=8  push=24
339 17:             forward_copy_shift      pull=16 push=16
341 18:             forward_copy_shift      pull=24 push=8
343 END(memcpy)
344 libc_hidden_builtin_def (memcpy)