From 8d379dad8f1670d233ac67b76b1c5a42ad3714a3 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 26 Sep 2006 10:52:32 +0200
Subject: [PATCH] [PATCH] annotate arch/x86_64/lib/*.S

Add unwind annotations to arch/x86_64/lib/*.S, and also use the macros
provided by linux/linkage.h where-ever possible.

Some of the alternative instructions handling needed to be adjusted so
that the replacement code would also have valid unwind information.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/lib/clear_page.S | 47 +++++++++++++++-----------
 arch/x86_64/lib/copy_page.S  | 53 +++++++++++++++++++----------
 arch/x86_64/lib/copy_user.S  | 39 ++++++++++++++++------
 arch/x86_64/lib/csum-copy.S  | 26 ++++++++++++---
 arch/x86_64/lib/getuser.S    | 32 +++++++++++-------
 arch/x86_64/lib/iomap_copy.S | 10 ++++--
 arch/x86_64/lib/memcpy.S     | 69 +++++++++++++++++++++-----------------
 arch/x86_64/lib/memset.S     | 79 ++++++++++++++++++++++++--------------------
 arch/x86_64/lib/putuser.S    | 32 +++++++++++-------
 9 files changed, 244 insertions(+), 143 deletions(-)
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 1f81b79b796..9a10a78bb4a 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 /*
  * Zero a page. 	
  * rdi	page
  */			
-	.globl clear_page
-	.p2align 4
-clear_page:
+	ALIGN
+clear_page_c:
+	CFI_STARTPROC
+	movl $4096/8,%ecx
+	xorl %eax,%eax
+	rep stosq
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page)
+
+ENTRY(clear_page)
+	CFI_STARTPROC
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -23,28 +35,25 @@ clear_page:
 	jnz	.Lloop
 	nop
 	ret
-clear_page_end:
+	CFI_ENDPROC
+.Lclear_page_end:
+ENDPROC(clear_page)
 
 	/* Some CPUs run faster using the string instructions.
 	   It is also a lot simpler. Use this when possible */
 
 #include <asm/cpufeature.h>
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  clear_page
-	.quad  clear_page_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  clear_page_end-clear_page
-	.byte  clear_page_c_end-clear_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-clear_page_c:
-	movl $4096/8,%ecx
-	xorl %eax,%eax
-	rep 
-	stosq
-	ret
-clear_page_c_end:
+	.quad clear_page
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lclear_page_end - clear_page
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 8fa19d96a7e..0ebb03b60e7 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 	
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+	ALIGN
+copy_page_c:
+	CFI_STARTPROC
+	movl $4096/8,%ecx
+	rep movsq
+	ret
+	CFI_ENDPROC
+ENDPROC(copy_page_c)
+
 /* Don't use streaming store because it's better when the target
    ends up in cache. */
 	    
 /* Could vary the prefetch distance based on SMP/UP */
 
-	.globl copy_page
-	.p2align 4
-copy_page:
+ENTRY(copy_page)
+	CFI_STARTPROC
 	subq	$3*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 3*8
 	movq	%rbx,(%rsp)
+	CFI_REL_OFFSET rbx, 0
 	movq	%r12,1*8(%rsp)
+	CFI_REL_OFFSET r12, 1*8
 	movq	%r13,2*8(%rsp)
+	CFI_REL_OFFSET r13, 2*8
 
 	movl	$(4096/64)-5,%ecx
 	.p2align 4
@@ -72,30 +88,33 @@ copy_page:
 	jnz	.Loop2
 
 	movq	(%rsp),%rbx
+	CFI_RESTORE rbx
 	movq	1*8(%rsp),%r12
+	CFI_RESTORE r12
 	movq	2*8(%rsp),%r13
+	CFI_RESTORE r13
 	addq	$3*8,%rsp
+	CFI_ADJUST_CFA_OFFSET -3*8
 	ret
+.Lcopy_page_end:
+	CFI_ENDPROC
+ENDPROC(copy_page)
 
 	/* Some CPUs run faster using the string copy instructions.
 	   It is also a lot simpler. Use this when possible */
 
 #include <asm/cpufeature.h>
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb					/* jmp <disp8> */
+	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  copy_page
-	.quad  copy_page_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  copy_page_c_end-copy_page_c
-	.byte  copy_page_c_end-copy_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-copy_page_c:
-	movl $4096/8,%ecx
-	rep 
-	movsq 
-	ret
-copy_page_c_end:
+	.quad copy_page
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lcopy_page_end - copy_page
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index f64569b83b5..962f3a693c5 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -4,6 +4,9 @@
  * Functions to copy from and to user space.		
  */		 
 
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 #define FIX_ALIGNMENT 1
 
 	#include <asm/current.h>
@@ -12,9 +15,8 @@
 	#include <asm/cpufeature.h>
 
 /* Standard copy_to_user with segment limit checking */		
-	.globl copy_to_user
-	.p2align 4	
-copy_to_user:
+ENTRY(copy_to_user)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rdi,%rcx
 	addq %rdx,%rcx
@@ -25,9 +27,11 @@ copy_to_user:
 	.byte 0xe9	/* 32bit jump */
 	.long .Lcug-1f
 1:
+	CFI_ENDPROC
+ENDPROC(copy_to_user)
 
 	.section .altinstr_replacement,"ax"
-3:	.byte 0xe9			/* replacement jmp with 8 bit immediate */
+3:	.byte 0xe9			/* replacement jmp with 32 bit immediate */
 	.long copy_user_generic_c-1b	/* offset */
 	.previous
 	.section .altinstructions,"a"
@@ -40,9 +44,8 @@ copy_to_user:
 	.previous
 
 /* Standard copy_from_user with segment limit checking */	
-	.globl copy_from_user
-	.p2align 4	
-copy_from_user:
+ENTRY(copy_from_user)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rsi,%rcx
 	addq %rdx,%rcx
@@ -50,10 +53,13 @@ copy_from_user:
 	cmpq threadinfo_addr_limit(%rax),%rcx
 	jae  bad_from_user
 	/* FALL THROUGH to copy_user_generic */
+	CFI_ENDPROC
+ENDPROC(copy_from_user)
 	
 	.section .fixup,"ax"
 	/* must zero dest */
 bad_from_user:
+	CFI_STARTPROC
 	movl %edx,%ecx
 	xorl %eax,%eax
 	rep
@@ -61,6 +67,8 @@ bad_from_user:
 bad_to_user:
 	movl	%edx,%eax
 	ret
+	CFI_ENDPROC
+END(bad_from_user)
 	.previous
 	
 		
@@ -75,9 +83,8 @@ bad_to_user:
  * Output:		
  * eax uncopied bytes or 0 if successful.
  */
-	.globl copy_user_generic
-	.p2align 4
-copy_user_generic:
+ENTRY(copy_user_generic)
+	CFI_STARTPROC
 	.byte 0x66,0x66,0x90	/* 5 byte nop for replacement jump */
 	.byte 0x66,0x90
 1:
@@ -95,6 +102,8 @@ copy_user_generic:
 	.previous
 .Lcug:
 	pushq %rbx
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET rbx, 0
 	xorl %eax,%eax		/*zero for the exception handler */
 
 #ifdef FIX_ALIGNMENT
@@ -168,9 +177,13 @@ copy_user_generic:
 	decl %ecx
 	jnz .Lloop_1
 
+	CFI_REMEMBER_STATE
 .Lende:
 	popq %rbx
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE rbx
 	ret
+	CFI_RESTORE_STATE
 
 #ifdef FIX_ALIGNMENT
 	/* align destination */
@@ -261,6 +274,9 @@ copy_user_generic:
 .Le_zero:
 	movq %rdx,%rax
 	jmp .Lende
+	CFI_ENDPROC
+ENDPROC(copy_user_generic)
+
 
 	/* Some CPUs run faster using the string copy instructions.
 	   This is also a lot simpler. Use them when possible.
@@ -282,6 +298,7 @@ copy_user_generic:
   * this please consider this.
    */
 copy_user_generic_c:
+	CFI_STARTPROC
 	movl %edx,%ecx
 	shrl $3,%ecx
 	andl $7,%edx	
@@ -294,6 +311,8 @@ copy_user_generic_c:
 	ret
 3:	lea (%rdx,%rcx,8),%rax
 	ret
+	CFI_ENDPROC
+END(copy_user_generic_c)
 
 	.section __ex_table,"a"
 	.quad 1b,3b
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55ee896..f0dba36578e 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
  * License.  See the file COPYING in the main directory of this archive
  * for more details. No warranty for anything given at all.
  */
- 	#include <linux/linkage.h>
-	#include <asm/errno.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/errno.h>
 
 /*
  * Checksum copy with exception handling.
@@ -53,19 +54,24 @@
 	.endm
 	
 				
-	.globl csum_partial_copy_generic
-	.p2align 4
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+	CFI_STARTPROC
 	cmpl	 $3*64,%edx
 	jle	 .Lignore
 
 .Lignore:		
 	subq  $7*8,%rsp
+	CFI_ADJUST_CFA_OFFSET 7*8
 	movq  %rbx,2*8(%rsp)
+	CFI_REL_OFFSET rbx, 2*8
 	movq  %r12,3*8(%rsp)
+	CFI_REL_OFFSET r12, 3*8
 	movq  %r14,4*8(%rsp)
+	CFI_REL_OFFSET r14, 4*8
 	movq  %r13,5*8(%rsp)
+	CFI_REL_OFFSET r13, 5*8
 	movq  %rbp,6*8(%rsp)
+	CFI_REL_OFFSET rbp, 6*8
 
 	movq  %r8,(%rsp)
 	movq  %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
 	addl %ebx,%eax
 	adcl %r9d,%eax		/* carry */
 			
+	CFI_REMEMBER_STATE
 .Lende:
 	movq 2*8(%rsp),%rbx
+	CFI_RESTORE rbx
 	movq 3*8(%rsp),%r12
+	CFI_RESTORE r12
 	movq 4*8(%rsp),%r14
+	CFI_RESTORE r14
 	movq 5*8(%rsp),%r13
+	CFI_RESTORE r13
 	movq 6*8(%rsp),%rbp
+	CFI_RESTORE rbp
 	addq $7*8,%rsp
+	CFI_ADJUST_CFA_OFFSET -7*8
 	ret
+	CFI_RESTORE_STATE
 
 	/* Exception handlers. Very simple, zeroing is done in the wrappers */
 .Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
 	jz   .Lende	
 	movl $-EFAULT,(%rax)
 	jmp .Lende
+	CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
index 3844d5e885a..5448876261f 100644
--- a/arch/x86_64/lib/getuser.S
+++ b/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/dwarf2.h>
 #include <asm/page.h>
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
 	.text
-	.p2align 4
-.globl __get_user_1
-__get_user_1:	
+ENTRY(__get_user_1)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	cmpq threadinfo_addr_limit(%r8),%rcx
 	jae bad_get_user
 1:	movzb (%rcx),%edx
 	xorl %eax,%eax
 	ret
+	CFI_ENDPROC
+ENDPROC(__get_user_1)
 
-	.p2align 4
-.globl __get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $1,%rcx
 	jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
 	ret
 20:	decq    %rcx
 	jmp	bad_get_user
+	CFI_ENDPROC
+ENDPROC(__get_user_2)
 
-	.p2align 4
-.globl __get_user_4
-__get_user_4:
+ENTRY(__get_user_4)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $3,%rcx
 	jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
 	ret
 30:	subq $3,%rcx
 	jmp bad_get_user
+	CFI_ENDPROC
+ENDPROC(__get_user_4)
 
-	.p2align 4
-.globl __get_user_8
-__get_user_8:
+ENTRY(__get_user_8)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $7,%rcx
 	jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
 	ret
 40:	subq $7,%rcx
 	jmp bad_get_user
+	CFI_ENDPROC
+ENDPROC(__get_user_8)
 
 bad_get_user:
+	CFI_STARTPROC
 	xorl %edx,%edx
 	movq $(-EFAULT),%rax
 	ret
+	CFI_ENDPROC
+END(bad_get_user)
 
 .section __ex_table,"a"
 	.quad 1b,bad_get_user
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
index 8bbade5fea0..05a95e713da 100644
--- a/arch/x86_64/lib/iomap_copy.S
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 /*
  * override generic version in lib/iomap_copy.c
  */
- 	.globl __iowrite32_copy
-	.p2align 4
-__iowrite32_copy:
+ENTRY(__iowrite32_copy)
+	CFI_STARTPROC
 	movl %edx,%ecx
 	rep movsd
 	ret
+	CFI_ENDPROC
+ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 5554948b555..967b22fa7d0 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
 /* Copyright 2002 Andi Kleen */
 	
-	#include <asm/cpufeature.h>		
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+
 /*
  * memcpy - Copy a memory block.
  *
@@ -13,12 +17,26 @@
  * rax original destination
  */	
 
- 	.globl __memcpy
-	.globl memcpy
-	.p2align 4
-__memcpy:
-memcpy:		
+	ALIGN
+memcpy_c:
+	CFI_STARTPROC
+	movq %rdi,%rax
+	movl %edx,%ecx
+	shrl $3,%ecx
+	andl $7,%edx
+	rep movsq
+	movl %edx,%ecx
+	rep movsb
+	ret
+	CFI_ENDPROC
+ENDPROC(memcpy_c)
+
+ENTRY(__memcpy)
+ENTRY(memcpy)
+	CFI_STARTPROC
 	pushq %rbx
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET rbx, 0
 	movq %rdi,%rax
 
 	movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
 
 .Lende:
 	popq %rbx
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE rbx
 	ret
 .Lfinal:
+	CFI_ENDPROC
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
 
 	/* Some CPUs run faster using the string copy instructions.
 	   It is also a lot simpler. Use this when possible */
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb				/* jmp <disp8> */
+	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  memcpy
-	.quad  memcpy_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  .Lfinal-memcpy
-	.byte  memcpy_c_end-memcpy_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi source
-  * rdx count
-  */
-memcpy_c:
-	movq %rdi,%rax
-	movl %edx,%ecx
-	shrl $3,%ecx
-	andl $7,%edx	
-	rep 
-	movsq 
-	movl %edx,%ecx
-	rep
-	movsb
-	ret
-memcpy_c_end:
+	.quad memcpy
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lfinal - memcpy
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index ad397f2c7de..09ed1f6b0ea 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
 /* Copyright 2002 Andi Kleen, SuSE Labs */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
 /*
  * ISO C memset - set a memory block to a byte value.
  *	
@@ -8,11 +13,29 @@
  * 
  * rax   original destination
  */	
- 	.globl __memset
-	.globl memset
-	.p2align 4
-memset:	
-__memset:
+	ALIGN
+memset_c:
+	CFI_STARTPROC
+	movq %rdi,%r9
+	movl %edx,%r8d
+	andl $7,%r8d
+	movl %edx,%ecx
+	shrl $3,%ecx
+	/* expand byte value  */
+	movzbl %sil,%esi
+	movabs $0x0101010101010101,%rax
+	mulq %rsi		/* with rax, clobbers rdx */
+	rep stosq
+	movl %r8d,%ecx
+	rep stosb
+	movq %r9,%rax
+	ret
+	CFI_ENDPROC
+ENDPROC(memset_c)
+
+ENTRY(memset)
+ENTRY(__memset)
+	CFI_STARTPROC
 	movq %rdi,%r10
 	movq %rdx,%r11
 
@@ -25,6 +48,7 @@ __memset:
 	movl  %edi,%r9d
 	andl  $7,%r9d
 	jnz  .Lbad_alignment
+	CFI_REMEMBER_STATE
 .Lafter_bad_alignment:
 
 	movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
 	movq	%r10,%rax
 	ret
 
+	CFI_RESTORE_STATE
 .Lbad_alignment:
 	cmpq $7,%r11
 	jbe	.Lhandle_7
@@ -84,42 +109,26 @@ __memset:
 	addq %r8,%rdi
 	subq %r8,%r11
 	jmp .Lafter_bad_alignment
+.Lfinal:
+	CFI_ENDPROC
+ENDPROC(memset)
+ENDPROC(__memset)
 
 	/* Some CPUs run faster using the string instructions.
 	   It is also a lot simpler. Use this when possible */
 
 #include <asm/cpufeature.h>
 
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb				/* jmp <disp8> */
+	.byte (memset_c - memset) - (2f - 1b)	/* offset */
+2:
+	.previous
 	.section .altinstructions,"a"
 	.align 8
-	.quad  memset
-	.quad  memset_c
-	.byte  X86_FEATURE_REP_GOOD
-	.byte  memset_c_end-memset_c
-	.byte  memset_c_end-memset_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi value
-  * rdx count
-  */
-memset_c:
-	movq %rdi,%r9
-	movl %edx,%r8d
-	andl $7,%r8d		
-	movl %edx,%ecx
-	shrl $3,%ecx		
-	/* expand byte value  */
-	movzbl %sil,%esi
-	movabs $0x0101010101010101,%rax
-	mulq   %rsi		/* with rax, clobbers rdx */
-	rep
-	stosq	
-	movl %r8d,%ecx
-	rep
-	stosb
-	movq %r9,%rax
-	ret
-memset_c_end:
+	.quad memset
+	.quad 1b
+	.byte X86_FEATURE_REP_GOOD
+	.byte .Lfinal - memset
+	.byte 2b - 1b
 	.previous
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
index 7f5593974e2..4989f5a8fa9 100644
--- a/arch/x86_64/lib/putuser.S
+++ b/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/dwarf2.h>
 #include <asm/page.h>
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
 	.text
-	.p2align 4
-.globl __put_user_1
-__put_user_1:
+ENTRY(__put_user_1)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	cmpq threadinfo_addr_limit(%r8),%rcx
 	jae bad_put_user
 1:	movb %dl,(%rcx)
 	xorl %eax,%eax
 	ret
+	CFI_ENDPROC
+ENDPROC(__put_user_1)
 
-	.p2align 4
-.globl __put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $1,%rcx
 	jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
 	ret
 20:	decq %rcx
 	jmp bad_put_user
+	CFI_ENDPROC
+ENDPROC(__put_user_2)
 
-	.p2align 4
-.globl __put_user_4
-__put_user_4:
+ENTRY(__put_user_4)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $3,%rcx
 	jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
 	ret
 30:	subq $3,%rcx
 	jmp bad_put_user
+	CFI_ENDPROC
+ENDPROC(__put_user_4)
 
-	.p2align 4
-.globl __put_user_8
-__put_user_8:
+ENTRY(__put_user_8)
+	CFI_STARTPROC
 	GET_THREAD_INFO(%r8)
 	addq $7,%rcx
 	jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
 	ret
 40:	subq $7,%rcx
 	jmp bad_put_user
+	CFI_ENDPROC
+ENDPROC(__put_user_8)
 
 bad_put_user:
+	CFI_STARTPROC
 	movq $(-EFAULT),%rax
 	ret
+	CFI_ENDPROC
+END(bad_put_user)
 
 .section __ex_table,"a"
 	.quad 1b,bad_put_user
-- 
2.11.4.GIT