sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library.  If not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* Thumb requires excessive IT insns here.  */
  21 #define NO_THUMB
  22 #include <sysdep.h>
  23 #include <arm-features.h>
  24
  25 /*
  26  * Data preload for architectures that support it (ARM V5TE and above)
  27  */
  28 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  29      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  30      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  31      && !defined (__ARM_ARCH_5T__))
  32 #define PLD(code...)    code
  33 #else
  34 #define PLD(code...)
  35 #endif
  36
  37 /*
  38  * This can be used to enable code to cacheline align the source pointer.
  39  * Experiments on tested architectures (StrongARM and XScale) didn't show
  40  * this a worthwhile thing to do.  That might be different in the future.
  41  */
  42 //#define CALGN(code...)        code
  43 #define CALGN(code...)
  44
  45 /*
  46  * Endian independent macros for shifting bytes within registers.
  47  */
  48 #ifndef __ARMEB__
  49 #define PULL            lsr
  50 #define PUSH            lsl
  51 #else
  52 #define PULL            lsl
  53 #define PUSH            lsr
  54 #endif
  55
  56                 .text
  57                 .syntax unified
  58
  59 /*
  60  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  61  *
  62  * Note:
  63  *
  64  * If the memory regions don't overlap, we simply branch to memcpy which is
  65  * normally a bit faster. Otherwise the copy is done going downwards.
  66  */
  67
  68 ENTRY(memmove)
  69
  70                 subs    ip, r0, r1
  71                 cmphi   r2, ip
  72 #if !IS_IN (libc)
  73                 bls     memcpy
  74 #else
  75                 bls     HIDDEN_JUMPTARGET(memcpy)
  76 #endif
  77
  78                 push    {r0, r4, lr}
  79                 cfi_adjust_cfa_offset (12)
  80                 cfi_rel_offset (r4, 4)
  81                 cfi_rel_offset (lr, 8)
  82
  83                 cfi_remember_state
  84
  85                 add     r1, r1, r2
  86                 add     r0, r0, r2
  87                 subs    r2, r2, #4
  88                 blt     8f
  89                 ands    ip, r0, #3
  90         PLD(    sfi_pld r1, #-4                 )
  91                 bne     9f
  92                 ands    ip, r1, #3
  93                 bne     10f
  94
  95 1:              subs    r2, r2, #(28)
  96                 push    {r5 - r8}
  97                 cfi_adjust_cfa_offset (16)
  98                 cfi_rel_offset (r5, 0)
  99                 cfi_rel_offset (r6, 4)
 100                 cfi_rel_offset (r7, 8)
 101                 cfi_rel_offset (r8, 12)
 102                 blt     5f
 103
 104         CALGN(  ands    ip, r1, #31             )
 105         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 106         CALGN(  bcs     2f                      )
 107         CALGN(  adr     r4, 6f                  )
 108         CALGN(  subs    r2, r2, ip              )  @ C is set here
 109 #ifndef ARM_ALWAYS_BX
 110         CALGN(  add     pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 111 #else
 112         CALGN(  add     r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
 113         CALGN(  bx      r4                      )
 114 #endif
 115
 116         PLD(    sfi_pld r1, #-4                 )
 117 2:      PLD(    subs    r2, r2, #96             )
 118         PLD(    sfi_pld r1, #-32                )
 119         PLD(    blt     4f                      )
 120         PLD(    sfi_pld r1, #-64                )
 121         PLD(    sfi_pld r1, #-96                )
 122
 123 3:      PLD(    sfi_pld r1, #-128               )
 124 4:              sfi_breg r1, \
 125                 ldmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
 126                 subs    r2, r2, #32
 127                 sfi_breg r0, \
 128                 stmdb   \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
 129                 bge     3b
 130         PLD(    cmn     r2, #96                 )
 131         PLD(    bge     4b                      )
 132
 133 5:              ands    ip, r2, #28
 134                 rsb     ip, ip, #32
 135 #ifndef ARM_ALWAYS_BX
 136                 /* C is always clear here.  */
 137                 addne   pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 138                 b       7f
 139 #else
 140                 beq     7f
 141                 push    {r10}
 142                 cfi_adjust_cfa_offset (4)
 143                 cfi_rel_offset (r10, 0)
 144 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 145                 /* If alignment is not perfect, then there will be some
 146                    padding (nop) instructions between this BX and label 6.
 147                    The computation above assumed that two instructions
 148                    later is exactly the right spot.  */
 149                 add     r10, #(6f - (0b + PC_OFS))
 150                 bx      r10
 151 #endif
 152                 .p2align ARM_BX_ALIGN_LOG2
 153 6:              nop
 154                 .p2align ARM_BX_ALIGN_LOG2
 155                 sfi_breg r1, \
 156                 ldr     r3, [\B, #-4]!
 157                 .p2align ARM_BX_ALIGN_LOG2
 158                 sfi_breg r1, \
 159                 ldr     r4, [\B, #-4]!
 160                 .p2align ARM_BX_ALIGN_LOG2
 161                 sfi_breg r1, \
 162                 ldr     r5, [\B, #-4]!
 163                 .p2align ARM_BX_ALIGN_LOG2
 164                 sfi_breg r1, \
 165                 ldr     r6, [\B, #-4]!
 166                 .p2align ARM_BX_ALIGN_LOG2
 167                 sfi_breg r1, \
 168                 ldr     r7, [\B, #-4]!
 169                 .p2align ARM_BX_ALIGN_LOG2
 170                 sfi_breg r1, \
 171                 ldr     r8, [\B, #-4]!
 172                 .p2align ARM_BX_ALIGN_LOG2
 173                 sfi_breg r1, \
 174                 ldr     lr, [\B, #-4]!
 175
 176 #ifndef ARM_ALWAYS_BX
 177                 add     pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 178                 nop
 179 #else
 180 0:              add     r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 181                 /* If alignment is not perfect, then there will be some
 182                    padding (nop) instructions between this BX and label 66.
 183                    The computation above assumed that two instructions
 184                    later is exactly the right spot.  */
 185                 add     r10, #(66f - (0b + PC_OFS))
 186                 bx      r10
 187 #endif
 188                 .p2align ARM_BX_ALIGN_LOG2
 189 66:             nop
 190                 .p2align ARM_BX_ALIGN_LOG2
 191                 sfi_breg r0, \
 192                 str     r3, [\B, #-4]!
 193                 .p2align ARM_BX_ALIGN_LOG2
 194                 sfi_breg r0, \
 195                 str     r4, [\B, #-4]!
 196                 .p2align ARM_BX_ALIGN_LOG2
 197                 sfi_breg r0, \
 198                 str     r5, [\B, #-4]!
 199                 .p2align ARM_BX_ALIGN_LOG2
 200                 sfi_breg r0, \
 201                 str     r6, [\B, #-4]!
 202                 .p2align ARM_BX_ALIGN_LOG2
 203                 sfi_breg r0, \
 204                 str     r7, [\B, #-4]!
 205                 .p2align ARM_BX_ALIGN_LOG2
 206                 sfi_breg r0, \
 207                 str     r8, [\B, #-4]!
 208                 .p2align ARM_BX_ALIGN_LOG2
 209                 sfi_breg r0, \
 210                 str     lr, [\B, #-4]!
 211
 212 #ifdef ARM_ALWAYS_BX
 213                 pop     {r10}
 214                 cfi_adjust_cfa_offset (-4)
 215                 cfi_restore (r10)
 216 #endif
 217
 218         CALGN(  bcs     2b                      )
 219
 220 7:              pop     {r5 - r8}
 221                 cfi_adjust_cfa_offset (-16)
 222                 cfi_restore (r5)
 223                 cfi_restore (r6)
 224                 cfi_restore (r7)
 225                 cfi_restore (r8)
 226
 227 8:              movs    r2, r2, lsl #31
 228                 sfi_breg r1, \
 229                 ldrbne  r3, [\B, #-1]!
 230                 sfi_breg r1, \
 231                 ldrbcs  r4, [\B, #-1]!
 232                 sfi_breg r1, \
 233                 ldrbcs  ip, [\B, #-1]
 234                 sfi_breg r0, \
 235                 strbne  r3, [\B, #-1]!
 236                 sfi_breg r0, \
 237                 strbcs  r4, [\B, #-1]!
 238                 sfi_breg r0, \
 239                 strbcs  ip, [\B, #-1]
 240
 241 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
 242      || defined (ARM_ALWAYS_BX))
 243                 pop     {r0, r4, lr}
 244                 cfi_adjust_cfa_offset (-12)
 245                 cfi_restore (r4)
 246                 cfi_restore (lr)
 247                 bx      lr
 248 #else
 249                 pop     {r0, r4, pc}
 250 #endif
 251
 252                 cfi_restore_state
 253
 254 9:              cmp     ip, #2
 255                 sfi_breg r1, \
 256                 ldrbgt  r3, [\B, #-1]!
 257                 sfi_breg r1, \
 258                 ldrbge  r4, [\B, #-1]!
 259                 sfi_breg r1, \
 260                 ldrb    lr, [\B, #-1]!
 261                 sfi_breg r0, \
 262                 strbgt  r3, [\B, #-1]!
 263                 sfi_breg r0, \
 264                 strbge  r4, [\B, #-1]!
 265                 subs    r2, r2, ip
 266                 sfi_breg r0, \
 267                 strb    lr, [\B, #-1]!
 268                 blt     8b
 269                 ands    ip, r1, #3
 270                 beq     1b
 271
 272 10:             bic     r1, r1, #3
 273                 cmp     ip, #2
 274                 sfi_breg r1, \
 275                 ldr     r3, [\B, #0]
 276                 beq     17f
 277                 blt     18f
 278
 279
 280                 .macro  backward_copy_shift push pull
 281
 282                 subs    r2, r2, #28
 283                 blt     14f
 284
 285         CALGN(  ands    ip, r1, #31             )
 286         CALGN(  rsb     ip, ip, #32             )
 287         CALGN(  sbcsne  r4, ip, r2              )  @ C is always set here
 288         CALGN(  subcc   r2, r2, ip              )
 289         CALGN(  bcc     15f                     )
 290
 291 11:             push    {r5 - r8, r10}
 292                 cfi_adjust_cfa_offset (20)
 293                 cfi_rel_offset (r5, 0)
 294                 cfi_rel_offset (r6, 4)
 295                 cfi_rel_offset (r7, 8)
 296                 cfi_rel_offset (r8, 12)
 297                 cfi_rel_offset (r10, 16)
 298
 299         PLD(    sfi_pld r1, #-4                 )
 300         PLD(    subs    r2, r2, #96             )
 301         PLD(    sfi_pld r1, #-32                )
 302         PLD(    blt     13f                     )
 303         PLD(    sfi_pld r1, #-64                )
 304         PLD(    sfi_pld r1, #-96                )
 305
 306 12:     PLD(    sfi_pld r1, #-128               )
 307 13:             sfi_breg r1, \
 308                 ldmdb   \B!, {r7, r8, r10, ip}
 309                 mov     lr, r3, PUSH #\push
 310                 subs    r2, r2, #32
 311                 sfi_breg r1, \
 312                 ldmdb   \B!, {r3, r4, r5, r6}
 313                 orr     lr, lr, ip, PULL #\pull
 314                 mov     ip, ip, PUSH #\push
 315                 orr     ip, ip, r10, PULL #\pull
 316                 mov     r10, r10, PUSH #\push
 317                 orr     r10, r10, r8, PULL #\pull
 318                 mov     r8, r8, PUSH #\push
 319                 orr     r8, r8, r7, PULL #\pull
 320                 mov     r7, r7, PUSH #\push
 321                 orr     r7, r7, r6, PULL #\pull
 322                 mov     r6, r6, PUSH #\push
 323                 orr     r6, r6, r5, PULL #\pull
 324                 mov     r5, r5, PUSH #\push
 325                 orr     r5, r5, r4, PULL #\pull
 326                 mov     r4, r4, PUSH #\push
 327                 orr     r4, r4, r3, PULL #\pull
 328                 sfi_breg r0, \
 329                 stmdb   \B!, {r4 - r8, r10, ip, lr}
 330                 bge     12b
 331         PLD(    cmn     r2, #96                 )
 332         PLD(    bge     13b                     )
 333
 334                 pop     {r5 - r8, r10}
 335                 cfi_adjust_cfa_offset (-20)
 336                 cfi_restore (r5)
 337                 cfi_restore (r6)
 338                 cfi_restore (r7)
 339                 cfi_restore (r8)
 340                 cfi_restore (r10)
 341
 342 14:             ands    ip, r2, #28
 343                 beq     16f
 344
 345 15:             mov     lr, r3, PUSH #\push
 346                 sfi_breg r1, \
 347                 ldr     r3, [\B, #-4]!
 348                 subs    ip, ip, #4
 349                 orr     lr, lr, r3, PULL #\pull
 350                 sfi_breg r0, \
 351                 str     lr, [\B, #-4]!
 352                 bgt     15b
 353         CALGN(  cmp     r2, #0                  )
 354         CALGN(  bge     11b                     )
 355
 356 16:             add     r1, r1, #(\pull / 8)
 357                 b       8b
 358
 359                 .endm
 360
 361
 362                 backward_copy_shift     push=8  pull=24
 363
 364 17:             backward_copy_shift     push=16 pull=16
 365
 366 18:             backward_copy_shift     push=24 pull=8
 367
 368
 369 END(memmove)
 370 libc_hidden_builtin_def (memmove)