ports/sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library.  If not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <sysdep.h>
  21
  22 /*
  23  * Data preload for architectures that support it (ARM V5TE and above)
  24  */
  25 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  26      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  27      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  28      && !defined (__ARM_ARCH_5T__))
  29 #define PLD(code...)    code
  30 #else
  31 #define PLD(code...)
  32 #endif
  33
  34 /*
  35  * This can be used to enable code to cacheline align the source pointer.
  36  * Experiments on tested architectures (StrongARM and XScale) didn't show
  37  * this a worthwhile thing to do.  That might be different in the future.
  38  */
  39 //#define CALGN(code...)        code
  40 #define CALGN(code...)
  41
  42 /*
  43  * Endian independent macros for shifting bytes within registers.
  44  */
  45 #ifndef __ARMEB__
  46 #define pull            lsr
  47 #define push            lsl
  48 #else
  49 #define pull            lsl
  50 #define push            lsr
  51 #endif
  52
  53                 .text
  54
  55 /*
  56  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  57  *
  58  * Note:
  59  *
  60  * If the memory regions don't overlap, we simply branch to memcpy which is
  61  * normally a bit faster. Otherwise the copy is done going downwards.
  62  */
  63
  64 ENTRY(memmove)
  65
  66                 subs    ip, r0, r1
  67                 cmphi   r2, ip
  68 #ifdef NOT_IN_libc
  69                 bls     memcpy
  70 #else
  71                 bls     HIDDEN_JUMPTARGET(memcpy)
  72 #endif
  73
  74                 stmfd   sp!, {r0, r4, lr}
  75                 cfi_adjust_cfa_offset (12)
  76                 cfi_rel_offset (r4, 4)
  77                 cfi_rel_offset (lr, 8)
  78
  79                 cfi_remember_state
  80
  81                 add     r1, r1, r2
  82                 add     r0, r0, r2
  83                 subs    r2, r2, #4
  84                 blt     8f
  85                 ands    ip, r0, #3
  86         PLD(    pld     [r1, #-4]               )
  87                 bne     9f
  88                 ands    ip, r1, #3
  89                 bne     10f
  90
  91 1:              subs    r2, r2, #(28)
  92                 stmfd   sp!, {r5 - r8}
  93                 cfi_adjust_cfa_offset (16)
  94                 cfi_rel_offset (r5, 0)
  95                 cfi_rel_offset (r6, 4)
  96                 cfi_rel_offset (r7, 8)
  97                 cfi_rel_offset (r8, 12)
  98                 blt     5f
  99
 100         CALGN(  ands    ip, r1, #31             )
 101         CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
 102         CALGN(  bcs     2f                      )
 103         CALGN(  adr     r4, 6f                  )
 104         CALGN(  subs    r2, r2, ip              )  @ C is set here
 105         CALGN(  add     pc, r4, ip              )
 106
 107         PLD(    pld     [r1, #-4]               )
 108 2:      PLD(    subs    r2, r2, #96             )
 109         PLD(    pld     [r1, #-32]              )
 110         PLD(    blt     4f                      )
 111         PLD(    pld     [r1, #-64]              )
 112         PLD(    pld     [r1, #-96]              )
 113
 114 3:      PLD(    pld     [r1, #-128]             )
 115 4:              ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 116                 subs    r2, r2, #32
 117                 stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 118                 bge     3b
 119         PLD(    cmn     r2, #96                 )
 120         PLD(    bge     4b                      )
 121
 122 5:              ands    ip, r2, #28
 123                 rsb     ip, ip, #32
 124                 addne   pc, pc, ip              @ C is always clear here
 125                 b       7f
 126 6:              nop
 127                 ldr     r3, [r1, #-4]!
 128                 ldr     r4, [r1, #-4]!
 129                 ldr     r5, [r1, #-4]!
 130                 ldr     r6, [r1, #-4]!
 131                 ldr     r7, [r1, #-4]!
 132                 ldr     r8, [r1, #-4]!
 133                 ldr     lr, [r1, #-4]!
 134
 135                 add     pc, pc, ip
 136                 nop
 137                 nop
 138                 str     r3, [r0, #-4]!
 139                 str     r4, [r0, #-4]!
 140                 str     r5, [r0, #-4]!
 141                 str     r6, [r0, #-4]!
 142                 str     r7, [r0, #-4]!
 143                 str     r8, [r0, #-4]!
 144                 str     lr, [r0, #-4]!
 145
 146         CALGN(  bcs     2b                      )
 147
 148 7:              ldmfd   sp!, {r5 - r8}
 149                 cfi_adjust_cfa_offset (-16)
 150                 cfi_restore (r5)
 151                 cfi_restore (r6)
 152                 cfi_restore (r7)
 153                 cfi_restore (r8)
 154
 155 8:              movs    r2, r2, lsl #31
 156                 ldrneb  r3, [r1, #-1]!
 157                 ldrcsb  r4, [r1, #-1]!
 158                 ldrcsb  ip, [r1, #-1]
 159                 strneb  r3, [r0, #-1]!
 160                 strcsb  r4, [r0, #-1]!
 161                 strcsb  ip, [r0, #-1]
 162
 163 #if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
 164                 ldmfd   sp!, {r0, r4, lr}
 165                 cfi_adjust_cfa_offset (-12)
 166                 cfi_restore (r4)
 167                 cfi_restore (lr)
 168                 bx      lr
 169 #else
 170                 ldmfd   sp!, {r0, r4, pc}
 171 #endif
 172
 173                 cfi_restore_state
 174
 175 9:              cmp     ip, #2
 176                 ldrgtb  r3, [r1, #-1]!
 177                 ldrgeb  r4, [r1, #-1]!
 178                 ldrb    lr, [r1, #-1]!
 179                 strgtb  r3, [r0, #-1]!
 180                 strgeb  r4, [r0, #-1]!
 181                 subs    r2, r2, ip
 182                 strb    lr, [r0, #-1]!
 183                 blt     8b
 184                 ands    ip, r1, #3
 185                 beq     1b
 186
 187 10:             bic     r1, r1, #3
 188                 cmp     ip, #2
 189                 ldr     r3, [r1, #0]
 190                 beq     17f
 191                 blt     18f
 192
 193
 194                 .macro  backward_copy_shift push pull
 195
 196                 subs    r2, r2, #28
 197                 blt     14f
 198
 199         CALGN(  ands    ip, r1, #31             )
 200         CALGN(  rsb     ip, ip, #32             )
 201         CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
 202         CALGN(  subcc   r2, r2, ip              )
 203         CALGN(  bcc     15f                     )
 204
 205 11:             stmfd   sp!, {r5 - r9}
 206                 cfi_adjust_cfa_offset (20)
 207                 cfi_rel_offset (r5, 0)
 208                 cfi_rel_offset (r6, 4)
 209                 cfi_rel_offset (r7, 8)
 210                 cfi_rel_offset (r8, 12)
 211                 cfi_rel_offset (r9, 16)
 212
 213         PLD(    pld     [r1, #-4]               )
 214         PLD(    subs    r2, r2, #96             )
 215         PLD(    pld     [r1, #-32]              )
 216         PLD(    blt     13f                     )
 217         PLD(    pld     [r1, #-64]              )
 218         PLD(    pld     [r1, #-96]              )
 219
 220 12:     PLD(    pld     [r1, #-128]             )
 221 13:             ldmdb   r1!, {r7, r8, r9, ip}
 222                 mov     lr, r3, push #\push
 223                 subs    r2, r2, #32
 224                 ldmdb   r1!, {r3, r4, r5, r6}
 225                 orr     lr, lr, ip, pull #\pull
 226                 mov     ip, ip, push #\push
 227                 orr     ip, ip, r9, pull #\pull
 228                 mov     r9, r9, push #\push
 229                 orr     r9, r9, r8, pull #\pull
 230                 mov     r8, r8, push #\push
 231                 orr     r8, r8, r7, pull #\pull
 232                 mov     r7, r7, push #\push
 233                 orr     r7, r7, r6, pull #\pull
 234                 mov     r6, r6, push #\push
 235                 orr     r6, r6, r5, pull #\pull
 236                 mov     r5, r5, push #\push
 237                 orr     r5, r5, r4, pull #\pull
 238                 mov     r4, r4, push #\push
 239                 orr     r4, r4, r3, pull #\pull
 240                 stmdb   r0!, {r4 - r9, ip, lr}
 241                 bge     12b
 242         PLD(    cmn     r2, #96                 )
 243         PLD(    bge     13b                     )
 244
 245                 ldmfd   sp!, {r5 - r9}
 246                 cfi_adjust_cfa_offset (-20)
 247                 cfi_restore (r5)
 248                 cfi_restore (r6)
 249                 cfi_restore (r7)
 250                 cfi_restore (r8)
 251                 cfi_restore (r9)
 252
 253 14:             ands    ip, r2, #28
 254                 beq     16f
 255
 256 15:             mov     lr, r3, push #\push
 257                 ldr     r3, [r1, #-4]!
 258                 subs    ip, ip, #4
 259                 orr     lr, lr, r3, pull #\pull
 260                 str     lr, [r0, #-4]!
 261                 bgt     15b
 262         CALGN(  cmp     r2, #0                  )
 263         CALGN(  bge     11b                     )
 264
 265 16:             add     r1, r1, #(\pull / 8)
 266                 b       8b
 267
 268                 .endm
 269
 270
 271                 backward_copy_shift     push=8  pull=24
 272
 273 17:             backward_copy_shift     push=16 pull=16
 274
 275 18:             backward_copy_shift     push=24 pull=8
 276
 277
 278 END(memmove)
 279 libc_hidden_builtin_def (memmove)