release/src-rt-6.x/linux/linux-2.6/arch/x86_64/lib/copy_user.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #include <linux/linkage.h>
   8 #include <asm/dwarf2.h>
   9
  10 #define FIX_ALIGNMENT 1
  11
  12 #include <asm/current.h>
  13 #include <asm/asm-offsets.h>
  14 #include <asm/thread_info.h>
  15 #include <asm/cpufeature.h>
  16
  17         .macro ALTERNATIVE_JUMP feature,orig,alt
  18 0:
  19         .byte 0xe9      /* 32bit jump */
  20         .long \orig-1f  /* by default jump to orig */
  21 1:
  22         .section .altinstr_replacement,"ax"
  23 2:      .byte 0xe9                   /* near jump with 32bit immediate */
  24         .long \alt-1b /* offset */   /* or alternatively to alt */
  25         .previous
  26         .section .altinstructions,"a"
  27         .align 8
  28         .quad  0b
  29         .quad  2b
  30         .byte  \feature              /* when feature is set */
  31         .byte  5
  32         .byte  5
  33         .previous
  34         .endm
  35
  36 /* Standard copy_to_user with segment limit checking */
  37 ENTRY(copy_to_user)
  38         CFI_STARTPROC
  39         GET_THREAD_INFO(%rax)
  40         movq %rdi,%rcx
  41         addq %rdx,%rcx
  42         jc  bad_to_user
  43         cmpq threadinfo_addr_limit(%rax),%rcx
  44         jae bad_to_user
  45         xorl %eax,%eax  /* clear zero flag */
  46         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  47         CFI_ENDPROC
  48
  49 ENTRY(copy_user_generic)
  50         CFI_STARTPROC
  51         movl $1,%ecx    /* set zero flag */
  52         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  53         CFI_ENDPROC
  54
  55 ENTRY(__copy_from_user_inatomic)
  56         CFI_STARTPROC
  57         xorl %ecx,%ecx  /* clear zero flag */
  58         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  59         CFI_ENDPROC
  60
  61 /* Standard copy_from_user with segment limit checking */
  62 ENTRY(copy_from_user)
  63         CFI_STARTPROC
  64         GET_THREAD_INFO(%rax)
  65         movq %rsi,%rcx
  66         addq %rdx,%rcx
  67         jc  bad_from_user
  68         cmpq threadinfo_addr_limit(%rax),%rcx
  69         jae  bad_from_user
  70         movl $1,%ecx    /* set zero flag */
  71         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  72         CFI_ENDPROC
  73 ENDPROC(copy_from_user)
  74
  75         .section .fixup,"ax"
  76         /* must zero dest */
  77 bad_from_user:
  78         CFI_STARTPROC
  79         movl %edx,%ecx
  80         xorl %eax,%eax
  81         rep
  82         stosb
  83 bad_to_user:
  84         movl    %edx,%eax
  85         ret
  86         CFI_ENDPROC
  87 END(bad_from_user)
  88         .previous
  89
  90
  91 /*
  92  * copy_user_generic_unrolled - memory copy with exception handling.
  93  * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
  94  *
  95  * Input:
  96  * rdi destination
  97  * rsi source
  98  * rdx count
  99  * ecx zero flag -- if true zero destination on error
 100  *
 101  * Output:
 102  * eax uncopied bytes or 0 if successful.
 103  */
 104 ENTRY(copy_user_generic_unrolled)
 105         CFI_STARTPROC
 106         pushq %rbx
 107         CFI_ADJUST_CFA_OFFSET 8
 108         CFI_REL_OFFSET rbx, 0
 109         pushq %rcx
 110         CFI_ADJUST_CFA_OFFSET 8
 111         CFI_REL_OFFSET rcx, 0
 112         xorl %eax,%eax          /*zero for the exception handler */
 113
 114 #ifdef FIX_ALIGNMENT
 115         /* check for bad alignment of destination */
 116         movl %edi,%ecx
 117         andl $7,%ecx
 118         jnz  .Lbad_alignment
 119 .Lafter_bad_alignment:
 120 #endif
 121
 122         movq %rdx,%rcx
 123
 124         movl $64,%ebx
 125         shrq $6,%rdx
 126         decq %rdx
 127         js   .Lhandle_tail
 128
 129         .p2align 4
 130 .Lloop:
 131 .Ls1:   movq (%rsi),%r11
 132 .Ls2:   movq 1*8(%rsi),%r8
 133 .Ls3:   movq 2*8(%rsi),%r9
 134 .Ls4:   movq 3*8(%rsi),%r10
 135 .Ld1:   movq %r11,(%rdi)
 136 .Ld2:   movq %r8,1*8(%rdi)
 137 .Ld3:   movq %r9,2*8(%rdi)
 138 .Ld4:   movq %r10,3*8(%rdi)
 139
 140 .Ls5:   movq 4*8(%rsi),%r11
 141 .Ls6:   movq 5*8(%rsi),%r8
 142 .Ls7:   movq 6*8(%rsi),%r9
 143 .Ls8:   movq 7*8(%rsi),%r10
 144 .Ld5:   movq %r11,4*8(%rdi)
 145 .Ld6:   movq %r8,5*8(%rdi)
 146 .Ld7:   movq %r9,6*8(%rdi)
 147 .Ld8:   movq %r10,7*8(%rdi)
 148
 149         decq %rdx
 150
 151         leaq 64(%rsi),%rsi
 152         leaq 64(%rdi),%rdi
 153
 154         jns  .Lloop
 155
 156         .p2align 4
 157 .Lhandle_tail:
 158         movl %ecx,%edx
 159         andl $63,%ecx
 160         shrl $3,%ecx
 161         jz   .Lhandle_7
 162         movl $8,%ebx
 163         .p2align 4
 164 .Lloop_8:
 165 .Ls9:   movq (%rsi),%r8
 166 .Ld9:   movq %r8,(%rdi)
 167         decl %ecx
 168         leaq 8(%rdi),%rdi
 169         leaq 8(%rsi),%rsi
 170         jnz .Lloop_8
 171
 172 .Lhandle_7:
 173         movl %edx,%ecx
 174         andl $7,%ecx
 175         jz   .Lende
 176         .p2align 4
 177 .Lloop_1:
 178 .Ls10:  movb (%rsi),%bl
 179 .Ld10:  movb %bl,(%rdi)
 180         incq %rdi
 181         incq %rsi
 182         decl %ecx
 183         jnz .Lloop_1
 184
 185         CFI_REMEMBER_STATE
 186 .Lende:
 187         popq %rcx
 188         CFI_ADJUST_CFA_OFFSET -8
 189         CFI_RESTORE rcx
 190         popq %rbx
 191         CFI_ADJUST_CFA_OFFSET -8
 192         CFI_RESTORE rbx
 193         ret
 194         CFI_RESTORE_STATE
 195
 196 #ifdef FIX_ALIGNMENT
 197         /* align destination */
 198         .p2align 4
 199 .Lbad_alignment:
 200         movl $8,%r9d
 201         subl %ecx,%r9d
 202         movl %r9d,%ecx
 203         cmpq %r9,%rdx
 204         jz   .Lhandle_7
 205         js   .Lhandle_7
 206 .Lalign_1:
 207 .Ls11:  movb (%rsi),%bl
 208 .Ld11:  movb %bl,(%rdi)
 209         incq %rsi
 210         incq %rdi
 211         decl %ecx
 212         jnz .Lalign_1
 213         subq %r9,%rdx
 214         jmp .Lafter_bad_alignment
 215 #endif
 216
 217         /* table sorted by exception address */
 218         .section __ex_table,"a"
 219         .align 8
 220         .quad .Ls1,.Ls1e
 221         .quad .Ls2,.Ls2e
 222         .quad .Ls3,.Ls3e
 223         .quad .Ls4,.Ls4e
 224         .quad .Ld1,.Ls1e
 225         .quad .Ld2,.Ls2e
 226         .quad .Ld3,.Ls3e
 227         .quad .Ld4,.Ls4e
 228         .quad .Ls5,.Ls5e
 229         .quad .Ls6,.Ls6e
 230         .quad .Ls7,.Ls7e
 231         .quad .Ls8,.Ls8e
 232         .quad .Ld5,.Ls5e
 233         .quad .Ld6,.Ls6e
 234         .quad .Ld7,.Ls7e
 235         .quad .Ld8,.Ls8e
 236         .quad .Ls9,.Le_quad
 237         .quad .Ld9,.Le_quad
 238         .quad .Ls10,.Le_byte
 239         .quad .Ld10,.Le_byte
 240 #ifdef FIX_ALIGNMENT
 241         .quad .Ls11,.Lzero_rest
 242         .quad .Ld11,.Lzero_rest
 243 #endif
 244         .quad .Le5,.Le_zero
 245         .previous
 246
 247         /* compute 64-offset for main loop. 8 bytes accuracy with error on the
 248            pessimistic side. this is gross. it would be better to fix the
 249            interface. */
 250         /* eax: zero, ebx: 64 */
 251 .Ls1e:  addl $8,%eax
 252 .Ls2e:  addl $8,%eax
 253 .Ls3e:  addl $8,%eax
 254 .Ls4e:  addl $8,%eax
 255 .Ls5e:  addl $8,%eax
 256 .Ls6e:  addl $8,%eax
 257 .Ls7e:  addl $8,%eax
 258 .Ls8e:  addl $8,%eax
 259         addq %rbx,%rdi  /* +64 */
 260         subq %rax,%rdi  /* correct destination with computed offset */
 261
 262         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 263         addq %rax,%rdx  /* add offset to loopcnt */
 264         andl $63,%ecx   /* remaining bytes */
 265         addq %rcx,%rdx  /* add them */
 266         jmp .Lzero_rest
 267
 268         /* exception on quad word loop in tail handling */
 269         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 270 .Le_quad:
 271         shll $3,%ecx
 272         andl $7,%edx
 273         addl %ecx,%edx
 274         /* edx: bytes to zero, rdi: dest, eax:zero */
 275 .Lzero_rest:
 276         cmpl $0,(%rsp)
 277         jz   .Le_zero
 278         movq %rdx,%rcx
 279 .Le_byte:
 280         xorl %eax,%eax
 281 .Le5:   rep
 282         stosb
 283         /* when there is another exception while zeroing the rest just return */
 284 .Le_zero:
 285         movq %rdx,%rax
 286         jmp .Lende
 287         CFI_ENDPROC
 288 ENDPROC(copy_user_generic)
 289
 290
 291         /* Some CPUs run faster using the string copy instructions.
 292            This is also a lot simpler. Use them when possible.
 293            Patch in jmps to this code instead of copying it fully
 294            to avoid unwanted aliasing in the exception tables. */
 295
 296  /* rdi destination
 297   * rsi source
 298   * rdx count
 299   * ecx zero flag
 300   *
 301   * Output:
 302   * eax uncopied bytes or 0 if successfull.
 303   *
 304   * Only 4GB of copy is supported. This shouldn't be a problem
 305   * because the kernel normally only writes from/to page sized chunks
 306   * even if user space passed a longer buffer.
 307   * And more would be dangerous because both Intel and AMD have
 308   * errata with rep movsq > 4GB. If someone feels the need to fix
 309   * this please consider this.
 310   */
 311 ENTRY(copy_user_generic_string)
 312         CFI_STARTPROC
 313         movl %ecx,%r8d          /* save zero flag */
 314         movl %edx,%ecx
 315         shrl $3,%ecx
 316         andl $7,%edx
 317         jz   10f
 318 1:      rep
 319         movsq
 320         movl %edx,%ecx
 321 2:      rep
 322         movsb
 323 9:      movl %ecx,%eax
 324         ret
 325
 326         /* multiple of 8 byte */
 327 10:     rep
 328         movsq
 329         xor %eax,%eax
 330         ret
 331
 332         /* exception handling */
 333 3:      lea (%rdx,%rcx,8),%rax  /* exception on quad loop */
 334         jmp 6f
 335 5:      movl %ecx,%eax          /* exception on byte loop */
 336         /* eax: left over bytes */
 337 6:      testl %r8d,%r8d         /* zero flag set? */
 338         jz 7f
 339         movl %eax,%ecx          /* initialize x86 loop counter */
 340         push %rax
 341         xorl %eax,%eax
 342 8:      rep
 343         stosb                   /* zero the rest */
 344 11:     pop %rax
 345 7:      ret
 346         CFI_ENDPROC
 347 END(copy_user_generic_c)
 348
 349         .section __ex_table,"a"
 350         .quad 1b,3b
 351         .quad 2b,5b
 352         .quad 8b,11b
 353         .quad 10b,3b
 354         .previous