sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006-2023 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library.  If not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 /* Thumb requires excessive IT insns here.  */
  20 #define NO_THUMB
  21 #include <sysdep.h>
  22 #include <arm-features.h>
  23
  24 /*
  25  * Data preload for architectures that support it (ARM V5TE and above)
  26  */
  27 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  28      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  29      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  30      && !defined (__ARM_ARCH_5T__))
  31 #define PLD(code...)    code
  32 #else
  33 #define PLD(code...)
  34 #endif
  35
  36 /*
  37  * This can be used to enable code to cacheline align the source pointer.
  38  * Experiments on tested architectures (StrongARM and XScale) didn't show
  39  * this a worthwhile thing to do.  That might be different in the future.
  40  */
  41 //#define CALGN(code...)        code
  42 #define CALGN(code...)
  43
  44 /*
  45  * Endian independent macros for shifting bytes within registers.
  46  */
  47 #ifndef __ARMEB__
  48 #define PULL            lsr
  49 #define PUSH            lsl
  50 #else
  51 #define PULL            lsl
  52 #define PUSH            lsr
  53 #endif
  54
  55                 .text
  56                 .syntax unified
  57
  58 /*
  59  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  60  *
  61  * Note:
  62  *
  63  * If the memory regions don't overlap, we simply branch to memcpy which is
  64  * normally a bit faster. Otherwise the copy is done going downwards.
  65  */
  66
  67 ENTRY(memmove)
  68
  69                 subs    ip, r0, r1
  70                 cmphi   r2, ip
  71 #if !IS_IN (libc)
  72                 bls     memcpy
  73 #else
  74                 bls     HIDDEN_JUMPTARGET(memcpy)
  75 #endif
  76
  77                 push    {r0, r4, lr}
  78                 cfi_adjust_cfa_offset (12)
  79                 cfi_rel_offset (r4, 4)
  80                 cfi_rel_offset (lr, 8)
  81
  82                 cfi_remember_state
  83
  84                 add     r1, r1, r2
  85                 add     r0, r0, r2
  86                 subs    r2, r2, #4
  87                 blo     8f
  88                 ands    ip, r0, #3
  89         PLD(    pld     [r1, #-4]               )
  90                 bne     9f
  91                 ands    ip, r1, #3
  92                 bne     10f
  93
  94 1:              subs    r2, r2, #(28)
  95                 push    {r5 - r8}
  96                 cfi_adjust_cfa_offset (16)
  97                 cfi_rel_offset (r5, 0)
  98                 cfi_rel_offset (r6, 4)
  99                 cfi_rel_offset (r7, 8)
 100                 cfi_rel_offset (r8, 12)
 101                 blo     5f
 102
 103         CALGN(  ands    ip, r1, #31             )
 104         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 105         CALGN(  bcs     2f                      )
 106         CALGN(  adr     r4, 6f                  )
 107         CALGN(  subs    r2, r2, ip              )  @ C is set here
 108 #ifndef ARM_ALWAYS_BX
 109         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 110 #else
 111         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 112         CALGN(  bx      r4                      )
 113 #endif
 114
 115         PLD(    pld     [r1, #-4]               )
 116 2:      PLD(    cmp     r2, #96                 )
 117         PLD(    pld     [r1, #-32]              )
 118         PLD(    blo     4f                      )
 119         PLD(    pld     [r1, #-64]              )
 120         PLD(    pld     [r1, #-96]              )
 121
 122 3:      PLD(    pld     [r1, #-128]             )
 123 4:              ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 124                 subs    r2, r2, #32
 125                 stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 126                 bhs     3b
 127
 128 5:              ands    ip, r2, #28
 129                 rsb     ip, ip, #32
 130 #ifndef ARM_ALWAYS_BX
 131                 /* C is always clear here.  */
 132                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 133                 b       7f
 134 #else
 135                 beq     7f
 136                 push    {r10}
 137                 cfi_adjust_cfa_offset (4)
 138                 cfi_rel_offset (r10, 0)
 139 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 140                 /* If alignment is not perfect, then there will be some
 141                    padding (nop) instructions between this BX and label 6.
 142                    The computation above assumed that two instructions
 143                    later is exactly the right spot.  */
 144                 add     r10, #(6f - (0b + PC_OFS))
 145                 bx      r10
 146 #endif
 147                 .p2align ARM_BX_ALIGN_LOG2
 148 6:              nop
 149                 .p2align ARM_BX_ALIGN_LOG2
 150                 ldr     r3, [r1, #-4]!
 151                 .p2align ARM_BX_ALIGN_LOG2
 152                 ldr     r4, [r1, #-4]!
 153                 .p2align ARM_BX_ALIGN_LOG2
 154                 ldr     r5, [r1, #-4]!
 155                 .p2align ARM_BX_ALIGN_LOG2
 156                 ldr     r6, [r1, #-4]!
 157                 .p2align ARM_BX_ALIGN_LOG2
 158                 ldr     r7, [r1, #-4]!
 159                 .p2align ARM_BX_ALIGN_LOG2
 160                 ldr     r8, [r1, #-4]!
 161                 .p2align ARM_BX_ALIGN_LOG2
 162                 ldr     lr, [r1, #-4]!
 163
 164 #ifndef ARM_ALWAYS_BX
 165                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 166                 nop
 167 #else
 168 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 169                 /* If alignment is not perfect, then there will be some
 170                    padding (nop) instructions between this BX and label 66.
 171                    The computation above assumed that two instructions
 172                    later is exactly the right spot.  */
 173                 add     r10, #(66f - (0b + PC_OFS))
 174                 bx      r10
 175 #endif
 176                 .p2align ARM_BX_ALIGN_LOG2
 177 66:             nop
 178                 .p2align ARM_BX_ALIGN_LOG2
 179                 str     r3, [r0, #-4]!
 180                 .p2align ARM_BX_ALIGN_LOG2
 181                 str     r4, [r0, #-4]!
 182                 .p2align ARM_BX_ALIGN_LOG2
 183                 str     r5, [r0, #-4]!
 184                 .p2align ARM_BX_ALIGN_LOG2
 185                 str     r6, [r0, #-4]!
 186                 .p2align ARM_BX_ALIGN_LOG2
 187                 str     r7, [r0, #-4]!
 188                 .p2align ARM_BX_ALIGN_LOG2
 189                 str     r8, [r0, #-4]!
 190                 .p2align ARM_BX_ALIGN_LOG2
 191                 str     lr, [r0, #-4]!
 192
 193 #ifdef ARM_ALWAYS_BX
 194                 pop     {r10}
 195                 cfi_adjust_cfa_offset (-4)
 196                 cfi_restore (r10)
 197 #endif
 198
 199         CALGN(  bcs     2b                      )
 200
 201 7:              pop     {r5 - r8}
 202                 cfi_adjust_cfa_offset (-16)
 203                 cfi_restore (r5)
 204                 cfi_restore (r6)
 205                 cfi_restore (r7)
 206                 cfi_restore (r8)
 207
 208 8:              movs    r2, r2, lsl #31
 209                 ldrbne  r3, [r1, #-1]!
 210                 ldrbcs  r4, [r1, #-1]!
 211                 ldrbcs  ip, [r1, #-1]
 212                 strbne  r3, [r0, #-1]!
 213                 strbcs  r4, [r0, #-1]!
 214                 strbcs  ip, [r0, #-1]
 215
 216 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
 217      || defined (ARM_ALWAYS_BX))
 218                 pop     {r0, r4, lr}
 219                 cfi_adjust_cfa_offset (-12)
 220                 cfi_restore (r4)
 221                 cfi_restore (lr)
 222                 bx      lr
 223 #else
 224                 pop     {r0, r4, pc}
 225 #endif
 226
 227                 cfi_restore_state
 228
 229 9:              cmp     ip, #2
 230                 ldrbgt  r3, [r1, #-1]!
 231                 ldrbge  r4, [r1, #-1]!
 232                 ldrb    lr, [r1, #-1]!
 233                 strbgt  r3, [r0, #-1]!
 234                 strbge  r4, [r0, #-1]!
 235                 subs    r2, r2, ip
 236                 strb    lr, [r0, #-1]!
 237                 blo     8b
 238                 ands    ip, r1, #3
 239                 beq     1b
 240
 241 10:             bic     r1, r1, #3
 242                 cmp     ip, #2
 243                 ldr     r3, [r1, #0]
 244                 beq     17f
 245                 blt     18f
 246
 247
 248                 .macro  backward_copy_shift push pull
 249
 250                 subs    r2, r2, #28
 251                 blo     14f
 252
 253         CALGN(  ands    ip, r1, #31             )
 254         CALGN(  rsb     ip, ip, #32             )
 255         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 256         CALGN(  subcc   r2, r2, ip              )
 257         CALGN(  bcc     15f                     )
 258
 259 11:             push    {r5 - r8, r10}
 260                 cfi_adjust_cfa_offset (20)
 261                 cfi_rel_offset (r5, 0)
 262                 cfi_rel_offset (r6, 4)
 263                 cfi_rel_offset (r7, 8)
 264                 cfi_rel_offset (r8, 12)
 265                 cfi_rel_offset (r10, 16)
 266
 267         PLD(    pld     [r1, #-4]               )
 268         PLD(    cmp     r2, #96                 )
 269         PLD(    pld     [r1, #-32]              )
 270         PLD(    blo     13f                     )
 271         PLD(    pld     [r1, #-64]              )
 272         PLD(    pld     [r1, #-96]              )
 273
 274 12:     PLD(    pld     [r1, #-128]             )
 275 13:             ldmdb   r1!, {r7, r8, r10, ip}
 276                 mov     lr, r3, PUSH #\push
 277                 subs    r2, r2, #32
 278                 ldmdb   r1!, {r3, r4, r5, r6}
 279                 orr     lr, lr, ip, PULL #\pull
 280                 mov     ip, ip, PUSH #\push
 281                 orr     ip, ip, r10, PULL #\pull
 282                 mov     r10, r10, PUSH #\push
 283                 orr     r10, r10, r8, PULL #\pull
 284                 mov     r8, r8, PUSH #\push
 285                 orr     r8, r8, r7, PULL #\pull
 286                 mov     r7, r7, PUSH #\push
 287                 orr     r7, r7, r6, PULL #\pull
 288                 mov     r6, r6, PUSH #\push
 289                 orr     r6, r6, r5, PULL #\pull
 290                 mov     r5, r5, PUSH #\push
 291                 orr     r5, r5, r4, PULL #\pull
 292                 mov     r4, r4, PUSH #\push
 293                 orr     r4, r4, r3, PULL #\pull
 294                 stmdb   r0!, {r4 - r8, r10, ip, lr}
 295                 bhs     12b
 296
 297                 pop     {r5 - r8, r10}
 298                 cfi_adjust_cfa_offset (-20)
 299                 cfi_restore (r5)
 300                 cfi_restore (r6)
 301                 cfi_restore (r7)
 302                 cfi_restore (r8)
 303                 cfi_restore (r10)
 304
 305 14:             ands    ip, r2, #28
 306                 beq     16f
 307
 308 15:             mov     lr, r3, PUSH #\push
 309                 ldr     r3, [r1, #-4]!
 310                 subs    ip, ip, #4
 311                 orr     lr, lr, r3, PULL #\pull
 312                 str     lr, [r0, #-4]!
 313                 bgt     15b
 314         CALGN(  cmp     r2, #0                  )
 315         CALGN(  bge     11b                     )
 316
 317 16:             add     r1, r1, #(\pull / 8)
 318                 b       8b
 319
 320                 .endm
 321
 322
 323                 backward_copy_shift     push=8  pull=24
 324
 325 17:             backward_copy_shift     push=16 pull=16
 326
 327 18:             backward_copy_shift     push=24 pull=8
 328
 329
 330 END(memmove)
 331 libc_hidden_builtin_def (memmove)