From 8d379dad8f1670d233ac67b76b1c5a42ad3714a3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:32 +0200 Subject: [PATCH] [PATCH] annotate arch/x86_64/lib/*.S Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros provided by linux/linkage.h where-ever possible. Some of the alternative instructions handling needed to be adjusted so that the replacement code would also have valid unwind information. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/x86_64/lib/clear_page.S | 47 +++++++++++++++----------- arch/x86_64/lib/copy_page.S | 53 +++++++++++++++++++---------- arch/x86_64/lib/copy_user.S | 39 ++++++++++++++++------ arch/x86_64/lib/csum-copy.S | 26 ++++++++++++--- arch/x86_64/lib/getuser.S | 32 +++++++++++------- arch/x86_64/lib/iomap_copy.S | 10 ++++-- arch/x86_64/lib/memcpy.S | 69 +++++++++++++++++++++----------------- arch/x86_64/lib/memset.S | 79 ++++++++++++++++++++++++-------------------- arch/x86_64/lib/putuser.S | 32 +++++++++++------- 9 files changed, 244 insertions(+), 143 deletions(-) diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 1f81b79b796..9a10a78bb4a 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S @@ -1,10 +1,22 @@ +#include +#include + /* * Zero a page. * rdi page */ - .globl clear_page - .p2align 4 -clear_page: + ALIGN +clear_page_c: + CFI_STARTPROC + movl $4096/8,%ecx + xorl %eax,%eax + rep stosq + ret + CFI_ENDPROC +ENDPROC(clear_page) + +ENTRY(clear_page) + CFI_STARTPROC xorl %eax,%eax movl $4096/64,%ecx .p2align 4 @@ -23,28 +35,25 @@ clear_page: jnz .Lloop nop ret -clear_page_end: + CFI_ENDPROC +.Lclear_page_end: +ENDPROC(clear_page) /* Some CPUs run faster using the string instructions. It is also a lot simpler. Use this when possible */ #include + .section .altinstr_replacement,"ax" +1: .byte 0xeb /* jmp */ + .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ +2: + .previous .section .altinstructions,"a" .align 8 - .quad clear_page - .quad clear_page_c - .byte X86_FEATURE_REP_GOOD - .byte clear_page_end-clear_page - .byte clear_page_c_end-clear_page_c - .previous - - .section .altinstr_replacement,"ax" -clear_page_c: - movl $4096/8,%ecx - xorl %eax,%eax - rep - stosq - ret -clear_page_c_end: + .quad clear_page + .quad 1b + .byte X86_FEATURE_REP_GOOD + .byte .Lclear_page_end - clear_page + .byte 2b - 1b .previous diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index 8fa19d96a7e..0ebb03b60e7 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S @@ -1,17 +1,33 @@ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ +#include +#include +#include + + ALIGN +copy_page_c: + CFI_STARTPROC + movl $4096/8,%ecx + rep movsq + ret + CFI_ENDPROC +ENDPROC(copy_page_c) + /* Don't use streaming store because it's better when the target ends up in cache. */ /* Could vary the prefetch distance based on SMP/UP */ - .globl copy_page - .p2align 4 -copy_page: +ENTRY(copy_page) + CFI_STARTPROC subq $3*8,%rsp + CFI_ADJUST_CFA_OFFSET 3*8 movq %rbx,(%rsp) + CFI_REL_OFFSET rbx, 0 movq %r12,1*8(%rsp) + CFI_REL_OFFSET r12, 1*8 movq %r13,2*8(%rsp) + CFI_REL_OFFSET r13, 2*8 movl $(4096/64)-5,%ecx .p2align 4 @@ -72,30 +88,33 @@ copy_page: jnz .Loop2 movq (%rsp),%rbx + CFI_RESTORE rbx movq 1*8(%rsp),%r12 + CFI_RESTORE r12 movq 2*8(%rsp),%r13 + CFI_RESTORE r13 addq $3*8,%rsp + CFI_ADJUST_CFA_OFFSET -3*8 ret +.Lcopy_page_end: + CFI_ENDPROC +ENDPROC(copy_page) /* Some CPUs run faster using the string copy instructions. It is also a lot simpler. Use this when possible */ #include + .section .altinstr_replacement,"ax" +1: .byte 0xeb /* jmp */ + .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ +2: + .previous .section .altinstructions,"a" .align 8 - .quad copy_page - .quad copy_page_c - .byte X86_FEATURE_REP_GOOD - .byte copy_page_c_end-copy_page_c - .byte copy_page_c_end-copy_page_c - .previous - - .section .altinstr_replacement,"ax" -copy_page_c: - movl $4096/8,%ecx - rep - movsq - ret -copy_page_c_end: + .quad copy_page + .quad 1b + .byte X86_FEATURE_REP_GOOD + .byte .Lcopy_page_end - copy_page + .byte 2b - 1b .previous diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index f64569b83b5..962f3a693c5 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S @@ -4,6 +4,9 @@ * Functions to copy from and to user space. */ +#include +#include + #define FIX_ALIGNMENT 1 #include @@ -12,9 +15,8 @@ #include /* Standard copy_to_user with segment limit checking */ - .globl copy_to_user - .p2align 4 -copy_to_user: +ENTRY(copy_to_user) + CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx @@ -25,9 +27,11 @@ copy_to_user: .byte 0xe9 /* 32bit jump */ .long .Lcug-1f 1: + CFI_ENDPROC +ENDPROC(copy_to_user) .section .altinstr_replacement,"ax" -3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ +3: .byte 0xe9 /* replacement jmp with 32 bit immediate */ .long copy_user_generic_c-1b /* offset */ .previous .section .altinstructions,"a" @@ -40,9 +44,8 @@ copy_to_user: .previous /* Standard copy_from_user with segment limit checking */ - .globl copy_from_user - .p2align 4 -copy_from_user: +ENTRY(copy_from_user) + CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx @@ -50,10 +53,13 @@ copy_from_user: cmpq threadinfo_addr_limit(%rax),%rcx jae bad_from_user /* FALL THROUGH to copy_user_generic */ + CFI_ENDPROC +ENDPROC(copy_from_user) .section .fixup,"ax" /* must zero dest */ bad_from_user: + CFI_STARTPROC movl %edx,%ecx xorl %eax,%eax rep @@ -61,6 +67,8 @@ bad_from_user: bad_to_user: movl %edx,%eax ret + CFI_ENDPROC +END(bad_from_user) .previous @@ -75,9 +83,8 @@ bad_to_user: * Output: * eax uncopied bytes or 0 if successful. */ - .globl copy_user_generic - .p2align 4 -copy_user_generic: +ENTRY(copy_user_generic) + CFI_STARTPROC .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ .byte 0x66,0x90 1: @@ -95,6 +102,8 @@ copy_user_generic: .previous .Lcug: pushq %rbx + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rbx, 0 xorl %eax,%eax /*zero for the exception handler */ #ifdef FIX_ALIGNMENT @@ -168,9 +177,13 @@ copy_user_generic: decl %ecx jnz .Lloop_1 + CFI_REMEMBER_STATE .Lende: popq %rbx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rbx ret + CFI_RESTORE_STATE #ifdef FIX_ALIGNMENT /* align destination */ @@ -261,6 +274,9 @@ copy_user_generic: .Le_zero: movq %rdx,%rax jmp .Lende + CFI_ENDPROC +ENDPROC(copy_user_generic) + /* Some CPUs run faster using the string copy instructions. This is also a lot simpler. Use them when possible. @@ -282,6 +298,7 @@ copy_user_generic: * this please consider this. */ copy_user_generic_c: + CFI_STARTPROC movl %edx,%ecx shrl $3,%ecx andl $7,%edx @@ -294,6 +311,8 @@ copy_user_generic_c: ret 3: lea (%rdx,%rcx,8),%rax ret + CFI_ENDPROC +END(copy_user_generic_c) .section __ex_table,"a" .quad 1b,3b diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S index 72fd55ee896..f0dba36578e 100644 --- a/arch/x86_64/lib/csum-copy.S +++ b/arch/x86_64/lib/csum-copy.S @@ -5,8 +5,9 @@ * License. See the file COPYING in the main directory of this archive * for more details. No warranty for anything given at all. */ - #include - #include +#include +#include +#include /* * Checksum copy with exception handling. @@ -53,19 +54,24 @@ .endm - .globl csum_partial_copy_generic - .p2align 4 -csum_partial_copy_generic: +ENTRY(csum_partial_copy_generic) + CFI_STARTPROC cmpl $3*64,%edx jle .Lignore .Lignore: subq $7*8,%rsp + CFI_ADJUST_CFA_OFFSET 7*8 movq %rbx,2*8(%rsp) + CFI_REL_OFFSET rbx, 2*8 movq %r12,3*8(%rsp) + CFI_REL_OFFSET r12, 3*8 movq %r14,4*8(%rsp) + CFI_REL_OFFSET r14, 4*8 movq %r13,5*8(%rsp) + CFI_REL_OFFSET r13, 5*8 movq %rbp,6*8(%rsp) + CFI_REL_OFFSET rbp, 6*8 movq %r8,(%rsp) movq %r9,1*8(%rsp) @@ -208,14 +214,22 @@ csum_partial_copy_generic: addl %ebx,%eax adcl %r9d,%eax /* carry */ + CFI_REMEMBER_STATE .Lende: movq 2*8(%rsp),%rbx + CFI_RESTORE rbx movq 3*8(%rsp),%r12 + CFI_RESTORE r12 movq 4*8(%rsp),%r14 + CFI_RESTORE r14 movq 5*8(%rsp),%r13 + CFI_RESTORE r13 movq 6*8(%rsp),%rbp + CFI_RESTORE rbp addq $7*8,%rsp + CFI_ADJUST_CFA_OFFSET -7*8 ret + CFI_RESTORE_STATE /* Exception handlers. Very simple, zeroing is done in the wrappers */ .Lbad_source: @@ -231,3 +245,5 @@ csum_partial_copy_generic: jz .Lende movl $-EFAULT,(%rax) jmp .Lende + CFI_ENDPROC +ENDPROC(csum_partial_copy_generic) diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S index 3844d5e885a..5448876261f 100644 --- a/arch/x86_64/lib/getuser.S +++ b/arch/x86_64/lib/getuser.S @@ -27,25 +27,26 @@ */ #include +#include #include #include #include #include .text - .p2align 4 -.globl __get_user_1 -__get_user_1: +ENTRY(__get_user_1) + CFI_STARTPROC GET_THREAD_INFO(%r8) cmpq threadinfo_addr_limit(%r8),%rcx jae bad_get_user 1: movzb (%rcx),%edx xorl %eax,%eax ret + CFI_ENDPROC +ENDPROC(__get_user_1) - .p2align 4 -.globl __get_user_2 -__get_user_2: +ENTRY(__get_user_2) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f @@ -57,10 +58,11 @@ __get_user_2: ret 20: decq %rcx jmp bad_get_user + CFI_ENDPROC +ENDPROC(__get_user_2) - .p2align 4 -.globl __get_user_4 -__get_user_4: +ENTRY(__get_user_4) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f @@ -72,10 +74,11 @@ __get_user_4: ret 30: subq $3,%rcx jmp bad_get_user + CFI_ENDPROC +ENDPROC(__get_user_4) - .p2align 4 -.globl __get_user_8 -__get_user_8: +ENTRY(__get_user_8) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f @@ -87,11 +90,16 @@ __get_user_8: ret 40: subq $7,%rcx jmp bad_get_user + CFI_ENDPROC +ENDPROC(__get_user_8) bad_get_user: + CFI_STARTPROC xorl %edx,%edx movq $(-EFAULT),%rax ret + CFI_ENDPROC +END(bad_get_user) .section __ex_table,"a" .quad 1b,bad_get_user diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S index 8bbade5fea0..05a95e713da 100644 --- a/arch/x86_64/lib/iomap_copy.S +++ b/arch/x86_64/lib/iomap_copy.S @@ -15,12 +15,16 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include +#include + /* * override generic version in lib/iomap_copy.c */ - .globl __iowrite32_copy - .p2align 4 -__iowrite32_copy: +ENTRY(__iowrite32_copy) + CFI_STARTPROC movl %edx,%ecx rep movsd ret + CFI_ENDPROC +ENDPROC(__iowrite32_copy) diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index 5554948b555..967b22fa7d0 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S @@ -1,6 +1,10 @@ /* Copyright 2002 Andi Kleen */ - #include +#include +#include +#include +#include + /* * memcpy - Copy a memory block. * @@ -13,12 +17,26 @@ * rax original destination */ - .globl __memcpy - .globl memcpy - .p2align 4 -__memcpy: -memcpy: + ALIGN +memcpy_c: + CFI_STARTPROC + movq %rdi,%rax + movl %edx,%ecx + shrl $3,%ecx + andl $7,%edx + rep movsq + movl %edx,%ecx + rep movsb + ret + CFI_ENDPROC +ENDPROC(memcpy_c) + +ENTRY(__memcpy) +ENTRY(memcpy) + CFI_STARTPROC pushq %rbx + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rbx, 0 movq %rdi,%rax movl %edx,%ecx @@ -86,36 +104,27 @@ memcpy: .Lende: popq %rbx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rbx ret .Lfinal: + CFI_ENDPROC +ENDPROC(memcpy) +ENDPROC(__memcpy) /* Some CPUs run faster using the string copy instructions. It is also a lot simpler. Use this when possible */ + .section .altinstr_replacement,"ax" +1: .byte 0xeb /* jmp */ + .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ +2: + .previous .section .altinstructions,"a" .align 8 - .quad memcpy - .quad memcpy_c - .byte X86_FEATURE_REP_GOOD - .byte .Lfinal-memcpy - .byte memcpy_c_end-memcpy_c - .previous - - .section .altinstr_replacement,"ax" - /* rdi destination - * rsi source - * rdx count - */ -memcpy_c: - movq %rdi,%rax - movl %edx,%ecx - shrl $3,%ecx - andl $7,%edx - rep - movsq - movl %edx,%ecx - rep - movsb - ret -memcpy_c_end: + .quad memcpy + .quad 1b + .byte X86_FEATURE_REP_GOOD + .byte .Lfinal - memcpy + .byte 2b - 1b .previous diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index ad397f2c7de..09ed1f6b0ea 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S @@ -1,4 +1,9 @@ /* Copyright 2002 Andi Kleen, SuSE Labs */ + +#include +#include +#include + /* * ISO C memset - set a memory block to a byte value. * @@ -8,11 +13,29 @@ * * rax original destination */ - .globl __memset - .globl memset - .p2align 4 -memset: -__memset: + ALIGN +memset_c: + CFI_STARTPROC + movq %rdi,%r9 + movl %edx,%r8d + andl $7,%r8d + movl %edx,%ecx + shrl $3,%ecx + /* expand byte value */ + movzbl %sil,%esi + movabs $0x0101010101010101,%rax + mulq %rsi /* with rax, clobbers rdx */ + rep stosq + movl %r8d,%ecx + rep stosb + movq %r9,%rax + ret + CFI_ENDPROC +ENDPROC(memset_c) + +ENTRY(memset) +ENTRY(__memset) + CFI_STARTPROC movq %rdi,%r10 movq %rdx,%r11 @@ -25,6 +48,7 @@ __memset: movl %edi,%r9d andl $7,%r9d jnz .Lbad_alignment + CFI_REMEMBER_STATE .Lafter_bad_alignment: movl %r11d,%ecx @@ -75,6 +99,7 @@ __memset: movq %r10,%rax ret + CFI_RESTORE_STATE .Lbad_alignment: cmpq $7,%r11 jbe .Lhandle_7 @@ -84,42 +109,26 @@ __memset: addq %r8,%rdi subq %r8,%r11 jmp .Lafter_bad_alignment +.Lfinal: + CFI_ENDPROC +ENDPROC(memset) +ENDPROC(__memset) /* Some CPUs run faster using the string instructions. It is also a lot simpler. Use this when possible */ #include + .section .altinstr_replacement,"ax" +1: .byte 0xeb /* jmp */ + .byte (memset_c - memset) - (2f - 1b) /* offset */ +2: + .previous .section .altinstructions,"a" .align 8 - .quad memset - .quad memset_c - .byte X86_FEATURE_REP_GOOD - .byte memset_c_end-memset_c - .byte memset_c_end-memset_c - .previous - - .section .altinstr_replacement,"ax" - /* rdi destination - * rsi value - * rdx count - */ -memset_c: - movq %rdi,%r9 - movl %edx,%r8d - andl $7,%r8d - movl %edx,%ecx - shrl $3,%ecx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - mulq %rsi /* with rax, clobbers rdx */ - rep - stosq - movl %r8d,%ecx - rep - stosb - movq %r9,%rax - ret -memset_c_end: + .quad memset + .quad 1b + .byte X86_FEATURE_REP_GOOD + .byte .Lfinal - memset + .byte 2b - 1b .previous diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index 7f5593974e2..4989f5a8fa9 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S @@ -25,25 +25,26 @@ */ #include +#include #include #include #include #include .text - .p2align 4 -.globl __put_user_1 -__put_user_1: +ENTRY(__put_user_1) + CFI_STARTPROC GET_THREAD_INFO(%r8) cmpq threadinfo_addr_limit(%r8),%rcx jae bad_put_user 1: movb %dl,(%rcx) xorl %eax,%eax ret + CFI_ENDPROC +ENDPROC(__put_user_1) - .p2align 4 -.globl __put_user_2 -__put_user_2: +ENTRY(__put_user_2) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f @@ -55,10 +56,11 @@ __put_user_2: ret 20: decq %rcx jmp bad_put_user + CFI_ENDPROC +ENDPROC(__put_user_2) - .p2align 4 -.globl __put_user_4 -__put_user_4: +ENTRY(__put_user_4) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f @@ -70,10 +72,11 @@ __put_user_4: ret 30: subq $3,%rcx jmp bad_put_user + CFI_ENDPROC +ENDPROC(__put_user_4) - .p2align 4 -.globl __put_user_8 -__put_user_8: +ENTRY(__put_user_8) + CFI_STARTPROC GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f @@ -85,10 +88,15 @@ __put_user_8: ret 40: subq $7,%rcx jmp bad_put_user + CFI_ENDPROC +ENDPROC(__put_user_8) bad_put_user: + CFI_STARTPROC movq $(-EFAULT),%rax ret + CFI_ENDPROC +END(bad_put_user) .section __ex_table,"a" .quad 1b,bad_put_user -- 2.11.4.GIT