2 * arch/xtensa/lib/usercopy.S
4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 * It needs to remain separate and distinct. The hal files are part
8 * of the Xtensa link-time HAL, and those files may differ per
9 * processor configuration. Patching the kernel for another
10 * processor configuration includes replacing the hal files, and we
11 * could lose the special functionality for accessing user-space
12 * memory during such a patch. We sacrifice a little code space here
13 * in favor to simplify code maintenance.
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file "COPYING" in the main directory of
17 * this archive for more details.
19 * Copyright (C) 2002 Tensilica Inc.
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
26 * The returned value is the number of bytes not copied. Implies zero
29 * The general case algorithm is as follows:
30 * If the destination and source are both aligned,
31 * do 16B chunks with a loop, and then finish up with
32 * 8B, 4B, 2B, and 1B copies conditional on the length.
33 * If destination is aligned and source unaligned,
34 * do the same, but use SRC to align the source data.
35 * If destination is unaligned, align it by conditionally
36 * copying 1B and 2B and then retest.
37 * This code tries to use fall-through braches for the common
38 * case of aligned destinations (except for the branches to
39 * the alignment label).
53 * a11/ original length
56 #include <asm/variant/core.h>
59 #define ALIGN(R, W0, W1) src R, W0, W1
60 #define SSA8(R) ssa8b R
62 #define ALIGN(R, W0, W1) src R, W1, W0
63 #define SSA8(R) ssa8l R
66 /* Load or store instructions that may cause exceptions use the EX macro. */
68 #define EX(insn,reg1,reg2,offset,handler) \
69 9: insn reg1, reg2, offset; \
70 .section __ex_table, "a"; \
77 .global __xtensa_copy_user
78 .type __xtensa_copy_user,@function
80 entry sp, 16 # minimal stack frame
81 # a2/ dst, a3/ src, a4/ len
82 mov a5, a2 # copy dst so that a2 is return value
83 mov a11, a4 # preserve original len for error case
85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87 .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
88 srli a7, a4, 4 # number of loop iterations with 16B
90 movi a8, 3 # if source is also aligned,
91 bnone a3, a8, .Laligned # then use word copy
92 SSA8( a3) # set shift amount from byte offset
93 bnez a4, .Lsrcunaligned
94 movi a2, 0 # return success for len==0
98 * Destination is unaligned
101 .Ldst1mod2: # dst is only byte aligned
102 bltui a4, 7, .Lbytecopy # do short copies byte by byte
105 EX(l8ui, a6, a3, 0, l_fixup)
107 EX(s8i, a6, a5, 0, s_fixup)
110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
111 # return to main algorithm
112 .Ldst2mod4: # dst 16-bit aligned
114 bltui a4, 6, .Lbytecopy # do short copies byte by byte
115 EX(l8ui, a6, a3, 0, l_fixup)
116 EX(l8ui, a7, a3, 1, l_fixup)
118 EX(s8i, a6, a5, 0, s_fixup)
119 EX(s8i, a7, a5, 1, s_fixup)
122 j .Ldstaligned # dst is now aligned, return to main algorithm
128 .byte 0 # 1 mod 4 alignment for LOOPNEZ
129 # (0 mod 4 alignment for LBEG)
132 loopnez a4, .Lbytecopydone
133 #else /* !XCHAL_HAVE_LOOPS */
134 beqz a4, .Lbytecopydone
135 add a7, a3, a4 # a7 = end address for source
136 #endif /* !XCHAL_HAVE_LOOPS */
138 EX(l8ui, a6, a3, 0, l_fixup)
140 EX(s8i, a6, a5, 0, s_fixup)
142 #if !XCHAL_HAVE_LOOPS
143 blt a3, a7, .Lnextbyte
144 #endif /* !XCHAL_HAVE_LOOPS */
146 movi a2, 0 # return success for len bytes copied
150 * Destination and source are word-aligned.
152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
153 .align 4 # 1 mod 4 alignment for LOOPNEZ
154 .byte 0 # (0 mod 4 alignment for LBEG)
157 loopnez a7, .Loop1done
158 #else /* !XCHAL_HAVE_LOOPS */
161 add a8, a8, a3 # a8 = end of last 16B source chunk
162 #endif /* !XCHAL_HAVE_LOOPS */
164 EX(l32i, a6, a3, 0, l_fixup)
165 EX(l32i, a7, a3, 4, l_fixup)
166 EX(s32i, a6, a5, 0, s_fixup)
167 EX(l32i, a6, a3, 8, l_fixup)
168 EX(s32i, a7, a5, 4, s_fixup)
169 EX(l32i, a7, a3, 12, l_fixup)
170 EX(s32i, a6, a5, 8, s_fixup)
172 EX(s32i, a7, a5, 12, s_fixup)
174 #if !XCHAL_HAVE_LOOPS
176 #endif /* !XCHAL_HAVE_LOOPS */
180 EX(l32i, a6, a3, 0, l_fixup)
181 EX(l32i, a7, a3, 4, l_fixup)
183 EX(s32i, a6, a5, 0, s_fixup)
184 EX(s32i, a7, a5, 4, s_fixup)
189 EX(l32i, a6, a3, 0, l_fixup)
191 EX(s32i, a6, a5, 0, s_fixup)
196 EX(l16ui, a6, a3, 0, l_fixup)
198 EX(s16i, a6, a5, 0, s_fixup)
203 EX(l8ui, a6, a3, 0, l_fixup)
204 EX(s8i, a6, a5, 0, s_fixup)
206 movi a2, 0 # return success for len bytes copied
210 * Destination is aligned, Source is unaligned
214 .byte 0 # 1 mod 4 alignement for LOOPNEZ
215 # (0 mod 4 alignment for LBEG)
217 # copy 16 bytes per iteration for word-aligned dst and unaligned src
218 and a10, a3, a8 # save unalignment offset for below
219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
220 EX(l32i, a6, a3, 0, l_fixup) # load first word
222 loopnez a7, .Loop2done
223 #else /* !XCHAL_HAVE_LOOPS */
226 add a10, a10, a3 # a10 = end of last 16B source chunk
227 #endif /* !XCHAL_HAVE_LOOPS */
229 EX(l32i, a7, a3, 4, l_fixup)
230 EX(l32i, a8, a3, 8, l_fixup)
232 EX(s32i, a6, a5, 0, s_fixup)
233 EX(l32i, a9, a3, 12, l_fixup)
235 EX(s32i, a7, a5, 4, s_fixup)
236 EX(l32i, a6, a3, 16, l_fixup)
238 EX(s32i, a8, a5, 8, s_fixup)
241 EX(s32i, a9, a5, 12, s_fixup)
243 #if !XCHAL_HAVE_LOOPS
245 #endif /* !XCHAL_HAVE_LOOPS */
249 EX(l32i, a7, a3, 4, l_fixup)
250 EX(l32i, a8, a3, 8, l_fixup)
252 EX(s32i, a6, a5, 0, s_fixup)
255 EX(s32i, a7, a5, 4, s_fixup)
261 EX(l32i, a7, a3, 4, l_fixup)
264 EX(s32i, a6, a5, 0, s_fixup)
268 add a3, a3, a10 # readjust a3 with correct misalignment
271 EX(l8ui, a6, a3, 0, l_fixup)
272 EX(l8ui, a7, a3, 1, l_fixup)
274 EX(s8i, a6, a5, 0, s_fixup)
275 EX(s8i, a7, a5, 1, s_fixup)
280 EX(l8ui, a6, a3, 0, l_fixup)
281 EX(s8i, a6, a5, 0, s_fixup)
283 movi a2, 0 # return success for len bytes copied
287 .section .fixup, "ax"
290 /* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
295 * Clearing the remaining pieces of kernel memory plugs security
296 * holes. This functionality is the equivalent of the *_zeroing
297 * functions that some architectures provide.
304 sub a2, a5, a2 /* a2 <-- bytes copied */
305 sub a2, a11, a2 /* a2 <-- bytes not copied */
309 sub a2, a5, a2 /* a2 <-- bytes copied */
310 sub a2, a11, a2 /* a2 <-- bytes not copied == return value */
312 /* void *memset(void *s, int c, size_t n); */
318 /* Ignore memset return value in a6. */
319 /* a2 still contains bytes not copied. */