arch/x86/lib/memmove_64.S

   1 /*
   2  * Normally compiler builtins are used, but sometimes the compiler calls out
   3  * of line code. Based on asm-i386/string.h.
   4  *
   5  * This assembly file is re-written from memmove_64.c file.
   6  *      - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
   7  */
   8 #define _STRING_C
   9 #include <linux/linkage.h>
  10 #include <asm/dwarf2.h>
  11 #include <asm/cpufeature.h>
  12 #include <asm/alternative-asm.h>
  13
  14 #undef memmove
  15
  16 /*
  17  * Implement memmove(). This can handle overlap between src and dst.
  18  *
  19  * Input:
  20  * rdi: dest
  21  * rsi: src
  22  * rdx: count
  23  *
  24  * Output:
  25  * rax: dest
  26  */
  27 .weak memmove
  28
  29 ENTRY(memmove)
  30 ENTRY(__memmove)
  31         CFI_STARTPROC
  32
  33         /* Handle more 32 bytes in loop */
  34         mov %rdi, %rax
  35         cmp $0x20, %rdx
  36         jb      1f
  37
  38         /* Decide forward/backward copy mode */
  39         cmp %rdi, %rsi
  40         jge .Lmemmove_begin_forward
  41         mov %rsi, %r8
  42         add %rdx, %r8
  43         cmp %rdi, %r8
  44         jg 2f
  45
  46 .Lmemmove_begin_forward:
  47         /*
  48          * movsq instruction have many startup latency
  49          * so we handle small size by general register.
  50          */
  51         cmp  $680, %rdx
  52         jb      3f
  53         /*
  54          * movsq instruction is only good for aligned case.
  55          */
  56
  57         cmpb %dil, %sil
  58         je 4f
  59 3:
  60         sub $0x20, %rdx
  61         /*
  62          * We gobble 32 bytes forward in each loop.
  63          */
  64 5:
  65         sub $0x20, %rdx
  66         movq 0*8(%rsi), %r11
  67         movq 1*8(%rsi), %r10
  68         movq 2*8(%rsi), %r9
  69         movq 3*8(%rsi), %r8
  70         leaq 4*8(%rsi), %rsi
  71
  72         movq %r11, 0*8(%rdi)
  73         movq %r10, 1*8(%rdi)
  74         movq %r9, 2*8(%rdi)
  75         movq %r8, 3*8(%rdi)
  76         leaq 4*8(%rdi), %rdi
  77         jae 5b
  78         addq $0x20, %rdx
  79         jmp 1f
  80         /*
  81          * Handle data forward by movsq.
  82          */
  83         .p2align 4
  84 4:
  85         movq %rdx, %rcx
  86         movq -8(%rsi, %rdx), %r11
  87         lea -8(%rdi, %rdx), %r10
  88         shrq $3, %rcx
  89         rep movsq
  90         movq %r11, (%r10)
  91         jmp 13f
  92 .Lmemmove_end_forward:
  93
  94         /*
  95          * Handle data backward by movsq.
  96          */
  97         .p2align 4
  98 7:
  99         movq %rdx, %rcx
 100         movq (%rsi), %r11
 101         movq %rdi, %r10
 102         leaq -8(%rsi, %rdx), %rsi
 103         leaq -8(%rdi, %rdx), %rdi
 104         shrq $3, %rcx
 105         std
 106         rep movsq
 107         cld
 108         movq %r11, (%r10)
 109         jmp 13f
 110
 111         /*
 112          * Start to prepare for backward copy.
 113          */
 114         .p2align 4
 115 2:
 116         cmp $680, %rdx
 117         jb 6f
 118         cmp %dil, %sil
 119         je 7b
 120 6:
 121         /*
 122          * Calculate copy position to tail.
 123          */
 124         addq %rdx, %rsi
 125         addq %rdx, %rdi
 126         subq $0x20, %rdx
 127         /*
 128          * We gobble 32 bytes backward in each loop.
 129          */
 130 8:
 131         subq $0x20, %rdx
 132         movq -1*8(%rsi), %r11
 133         movq -2*8(%rsi), %r10
 134         movq -3*8(%rsi), %r9
 135         movq -4*8(%rsi), %r8
 136         leaq -4*8(%rsi), %rsi
 137
 138         movq %r11, -1*8(%rdi)
 139         movq %r10, -2*8(%rdi)
 140         movq %r9, -3*8(%rdi)
 141         movq %r8, -4*8(%rdi)
 142         leaq -4*8(%rdi), %rdi
 143         jae 8b
 144         /*
 145          * Calculate copy position to head.
 146          */
 147         addq $0x20, %rdx
 148         subq %rdx, %rsi
 149         subq %rdx, %rdi
 150 1:
 151         cmpq $16, %rdx
 152         jb 9f
 153         /*
 154          * Move data from 16 bytes to 31 bytes.
 155          */
 156         movq 0*8(%rsi), %r11
 157         movq 1*8(%rsi), %r10
 158         movq -2*8(%rsi, %rdx), %r9
 159         movq -1*8(%rsi, %rdx), %r8
 160         movq %r11, 0*8(%rdi)
 161         movq %r10, 1*8(%rdi)
 162         movq %r9, -2*8(%rdi, %rdx)
 163         movq %r8, -1*8(%rdi, %rdx)
 164         jmp 13f
 165         .p2align 4
 166 9:
 167         cmpq $8, %rdx
 168         jb 10f
 169         /*
 170          * Move data from 8 bytes to 15 bytes.
 171          */
 172         movq 0*8(%rsi), %r11
 173         movq -1*8(%rsi, %rdx), %r10
 174         movq %r11, 0*8(%rdi)
 175         movq %r10, -1*8(%rdi, %rdx)
 176         jmp 13f
 177 10:
 178         cmpq $4, %rdx
 179         jb 11f
 180         /*
 181          * Move data from 4 bytes to 7 bytes.
 182          */
 183         movl (%rsi), %r11d
 184         movl -4(%rsi, %rdx), %r10d
 185         movl %r11d, (%rdi)
 186         movl %r10d, -4(%rdi, %rdx)
 187         jmp 13f
 188 11:
 189         cmp $2, %rdx
 190         jb 12f
 191         /*
 192          * Move data from 2 bytes to 3 bytes.
 193          */
 194         movw (%rsi), %r11w
 195         movw -2(%rsi, %rdx), %r10w
 196         movw %r11w, (%rdi)
 197         movw %r10w, -2(%rdi, %rdx)
 198         jmp 13f
 199 12:
 200         cmp $1, %rdx
 201         jb 13f
 202         /*
 203          * Move data for 1 byte.
 204          */
 205         movb (%rsi), %r11b
 206         movb %r11b, (%rdi)
 207 13:
 208         retq
 209         CFI_ENDPROC
 210
 211         .section .altinstr_replacement,"ax"
 212 .Lmemmove_begin_forward_efs:
 213         /* Forward moving data. */
 214         movq %rdx, %rcx
 215         rep movsb
 216         retq
 217 .Lmemmove_end_forward_efs:
 218         .previous
 219
 220         .section .altinstructions,"a"
 221         altinstruction_entry .Lmemmove_begin_forward,           \
 222                 .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,   \
 223                 .Lmemmove_end_forward-.Lmemmove_begin_forward,  \
 224                 .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
 225         .previous
 226 ENDPROC(__memmove)
 227 ENDPROC(memmove)