Fixed wrong vector sincos/sincosf ABI to have it compatible with
[glibc.git] / sysdeps / arm / memmove.S
blob96b2366e9292193804e824d9555e9eb1ad235d14
1 /* Copyright (C) 2006-2016 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library.  If not, see
18    <http://www.gnu.org/licenses/>.  */
20 /* Thumb requires excessive IT insns here.  */
21 #define NO_THUMB
22 #include <sysdep.h>
23 #include <arm-features.h>
26  * Data preload for architectures that support it (ARM V5TE and above)
27  */
28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31      && !defined (__ARM_ARCH_5T__))
32 #define PLD(code...)    code
33 #else
34 #define PLD(code...)
35 #endif
38  * This can be used to enable code to cacheline align the source pointer.
39  * Experiments on tested architectures (StrongARM and XScale) didn't show
40  * this a worthwhile thing to do.  That might be different in the future.
41  */
42 //#define CALGN(code...)        code
43 #define CALGN(code...)
46  * Endian independent macros for shifting bytes within registers.
47  */
48 #ifndef __ARMEB__
49 #define PULL            lsr
50 #define PUSH            lsl
51 #else
52 #define PULL            lsl
53 #define PUSH            lsr
54 #endif
56                 .text
57                 .syntax unified
60  * Prototype: void *memmove(void *dest, const void *src, size_t n);
61  *
62  * Note:
63  *
64  * If the memory regions don't overlap, we simply branch to memcpy which is
65  * normally a bit faster. Otherwise the copy is done going downwards.
66  */
68 ENTRY(memmove)
70                 subs    ip, r0, r1
71                 cmphi   r2, ip
72 #if !IS_IN (libc)
73                 bls     memcpy
74 #else
75                 bls     HIDDEN_JUMPTARGET(memcpy)
76 #endif
78                 push    {r0, r4, lr}
79                 cfi_adjust_cfa_offset (12)
80                 cfi_rel_offset (r4, 4)
81                 cfi_rel_offset (lr, 8)
83                 cfi_remember_state
85                 add     r1, r1, r2
86                 add     r0, r0, r2
87                 subs    r2, r2, #4
88                 blt     8f
89                 ands    ip, r0, #3
90         PLD(    sfi_pld r1, #-4                 )
91                 bne     9f
92                 ands    ip, r1, #3
93                 bne     10f
95 1:              subs    r2, r2, #(28)
96                 push    {r5 - r8}
97                 cfi_adjust_cfa_offset (16)
98                 cfi_rel_offset (r5, 0)
99                 cfi_rel_offset (r6, 4)
100                 cfi_rel_offset (r7, 8)
101                 cfi_rel_offset (r8, 12)
102                 blt     5f
104         CALGN(  ands    ip, r1, #31             )
105         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
106         CALGN(  bcs     2f                      )
107         CALGN(  adr     r4, 6f                  )
108         CALGN(  subs    r2, r2, ip              )  @ C is set here
109 #ifndef ARM_ALWAYS_BX
110         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
111 #else
112         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
113         CALGN(  bx      r4                      )
114 #endif
116         PLD(    sfi_pld r1, #-4                 )
117 2:      PLD(    subs    r2, r2, #96             )
118         PLD(    sfi_pld r1, #-32                )
119         PLD(    blt     4f                      )
120         PLD(    sfi_pld r1, #-64                )
121         PLD(    sfi_pld r1, #-96                )
123 3:      PLD(    sfi_pld r1, #-128               )
124 4:              sfi_breg r1, \
125                 ldmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
126                 subs    r2, r2, #32
127                 sfi_breg r0, \
128                 stmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
129                 bge     3b
130         PLD(    cmn     r2, #96                 )
131         PLD(    bge     4b                      )
133 5:              ands    ip, r2, #28
134                 rsb     ip, ip, #32
135 #ifndef ARM_ALWAYS_BX
136                 /* C is always clear here.  */
137                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
138                 b       7f
139 #else
140                 beq     7f
141                 push    {r10}
142                 cfi_adjust_cfa_offset (4)
143                 cfi_rel_offset (r10, 0)
144 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
145                 /* If alignment is not perfect, then there will be some
146                    padding (nop) instructions between this BX and label 6.
147                    The computation above assumed that two instructions
148                    later is exactly the right spot.  */
149                 add     r10, #(6f - (0b + PC_OFS))
150                 bx      r10
151 #endif
152                 .p2align ARM_BX_ALIGN_LOG2
153 6:              nop
154                 .p2align ARM_BX_ALIGN_LOG2
155                 sfi_breg r1, \
156                 ldr     r3, [\B, #-4]!
157                 .p2align ARM_BX_ALIGN_LOG2
158                 sfi_breg r1, \
159                 ldr     r4, [\B, #-4]!
160                 .p2align ARM_BX_ALIGN_LOG2
161                 sfi_breg r1, \
162                 ldr     r5, [\B, #-4]!
163                 .p2align ARM_BX_ALIGN_LOG2
164                 sfi_breg r1, \
165                 ldr     r6, [\B, #-4]!
166                 .p2align ARM_BX_ALIGN_LOG2
167                 sfi_breg r1, \
168                 ldr     r7, [\B, #-4]!
169                 .p2align ARM_BX_ALIGN_LOG2
170                 sfi_breg r1, \
171                 ldr     r8, [\B, #-4]!
172                 .p2align ARM_BX_ALIGN_LOG2
173                 sfi_breg r1, \
174                 ldr     lr, [\B, #-4]!
176 #ifndef ARM_ALWAYS_BX
177                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
178                 nop
179 #else
180 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
181                 /* If alignment is not perfect, then there will be some
182                    padding (nop) instructions between this BX and label 66.
183                    The computation above assumed that two instructions
184                    later is exactly the right spot.  */
185                 add     r10, #(66f - (0b + PC_OFS))
186                 bx      r10
187 #endif
188                 .p2align ARM_BX_ALIGN_LOG2
189 66:             nop
190                 .p2align ARM_BX_ALIGN_LOG2
191                 sfi_breg r0, \
192                 str     r3, [\B, #-4]!
193                 .p2align ARM_BX_ALIGN_LOG2
194                 sfi_breg r0, \
195                 str     r4, [\B, #-4]!
196                 .p2align ARM_BX_ALIGN_LOG2
197                 sfi_breg r0, \
198                 str     r5, [\B, #-4]!
199                 .p2align ARM_BX_ALIGN_LOG2
200                 sfi_breg r0, \
201                 str     r6, [\B, #-4]!
202                 .p2align ARM_BX_ALIGN_LOG2
203                 sfi_breg r0, \
204                 str     r7, [\B, #-4]!
205                 .p2align ARM_BX_ALIGN_LOG2
206                 sfi_breg r0, \
207                 str     r8, [\B, #-4]!
208                 .p2align ARM_BX_ALIGN_LOG2
209                 sfi_breg r0, \
210                 str     lr, [\B, #-4]!
212 #ifdef ARM_ALWAYS_BX
213                 pop     {r10}
214                 cfi_adjust_cfa_offset (-4)
215                 cfi_restore (r10)
216 #endif
218         CALGN(  bcs     2b                      )
220 7:              pop     {r5 - r8}
221                 cfi_adjust_cfa_offset (-16)
222                 cfi_restore (r5)
223                 cfi_restore (r6)
224                 cfi_restore (r7)
225                 cfi_restore (r8)
227 8:              movs    r2, r2, lsl #31
228                 sfi_breg r1, \
229                 ldrbne  r3, [\B, #-1]!
230                 sfi_breg r1, \
231                 ldrbcs  r4, [\B, #-1]!
232                 sfi_breg r1, \
233                 ldrbcs  ip, [\B, #-1]
234                 sfi_breg r0, \
235                 strbne  r3, [\B, #-1]!
236                 sfi_breg r0, \
237                 strbcs  r4, [\B, #-1]!
238                 sfi_breg r0, \
239                 strbcs  ip, [\B, #-1]
241 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
242      || defined (ARM_ALWAYS_BX))
243                 pop     {r0, r4, lr}
244                 cfi_adjust_cfa_offset (-12)
245                 cfi_restore (r4)
246                 cfi_restore (lr)
247                 bx      lr
248 #else
249                 pop     {r0, r4, pc}
250 #endif
252                 cfi_restore_state
254 9:              cmp     ip, #2
255                 sfi_breg r1, \
256                 ldrbgt  r3, [\B, #-1]!
257                 sfi_breg r1, \
258                 ldrbge  r4, [\B, #-1]!
259                 sfi_breg r1, \
260                 ldrb    lr, [\B, #-1]!
261                 sfi_breg r0, \
262                 strbgt  r3, [\B, #-1]!
263                 sfi_breg r0, \
264                 strbge  r4, [\B, #-1]!
265                 subs    r2, r2, ip
266                 sfi_breg r0, \
267                 strb    lr, [\B, #-1]!
268                 blt     8b
269                 ands    ip, r1, #3
270                 beq     1b
272 10:             bic     r1, r1, #3
273                 cmp     ip, #2
274                 sfi_breg r1, \
275                 ldr     r3, [\B, #0]
276                 beq     17f
277                 blt     18f
280                 .macro  backward_copy_shift push pull
282                 subs    r2, r2, #28
283                 blt     14f
285         CALGN(  ands    ip, r1, #31             )
286         CALGN(  rsb     ip, ip, #32             )
287         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
288         CALGN(  subcc   r2, r2, ip              )
289         CALGN(  bcc     15f                     )
291 11:             push    {r5 - r8, r10}
292                 cfi_adjust_cfa_offset (20)
293                 cfi_rel_offset (r5, 0)
294                 cfi_rel_offset (r6, 4)
295                 cfi_rel_offset (r7, 8)
296                 cfi_rel_offset (r8, 12)
297                 cfi_rel_offset (r10, 16)
299         PLD(    sfi_pld r1, #-4                 )
300         PLD(    subs    r2, r2, #96             )
301         PLD(    sfi_pld r1, #-32                )
302         PLD(    blt     13f                     )
303         PLD(    sfi_pld r1, #-64                )
304         PLD(    sfi_pld r1, #-96                )
306 12:     PLD(    sfi_pld r1, #-128               )
307 13:             sfi_breg r1, \
308                 ldmdb   \B!, {r7, r8, r10, ip}
309                 mov     lr, r3, PUSH #\push
310                 subs    r2, r2, #32
311                 sfi_breg r1, \
312                 ldmdb   \B!, {r3, r4, r5, r6}
313                 orr     lr, lr, ip, PULL #\pull
314                 mov     ip, ip, PUSH #\push
315                 orr     ip, ip, r10, PULL #\pull
316                 mov     r10, r10, PUSH #\push
317                 orr     r10, r10, r8, PULL #\pull
318                 mov     r8, r8, PUSH #\push
319                 orr     r8, r8, r7, PULL #\pull
320                 mov     r7, r7, PUSH #\push
321                 orr     r7, r7, r6, PULL #\pull
322                 mov     r6, r6, PUSH #\push
323                 orr     r6, r6, r5, PULL #\pull
324                 mov     r5, r5, PUSH #\push
325                 orr     r5, r5, r4, PULL #\pull
326                 mov     r4, r4, PUSH #\push
327                 orr     r4, r4, r3, PULL #\pull
328                 sfi_breg r0, \
329                 stmdb   \B!, {r4 - r8, r10, ip, lr}
330                 bge     12b
331         PLD(    cmn     r2, #96                 )
332         PLD(    bge     13b                     )
334                 pop     {r5 - r8, r10}
335                 cfi_adjust_cfa_offset (-20)
336                 cfi_restore (r5)
337                 cfi_restore (r6)
338                 cfi_restore (r7)
339                 cfi_restore (r8)
340                 cfi_restore (r10)
342 14:             ands    ip, r2, #28
343                 beq     16f
345 15:             mov     lr, r3, PUSH #\push
346                 sfi_breg r1, \
347                 ldr     r3, [\B, #-4]!
348                 subs    ip, ip, #4
349                 orr     lr, lr, r3, PULL #\pull
350                 sfi_breg r0, \
351                 str     lr, [\B, #-4]!
352                 bgt     15b
353         CALGN(  cmp     r2, #0                  )
354         CALGN(  bge     11b                     )
356 16:             add     r1, r1, #(\pull / 8)
357                 b       8b
359                 .endm
362                 backward_copy_shift     push=8  pull=24
364 17:             backward_copy_shift     push=16 pull=16
366 18:             backward_copy_shift     push=24 pull=8
369 END(memmove)
370 libc_hidden_builtin_def (memmove)