2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * $Id: memcpy.S,v 1.3 1998/07/10 01:14:49 ralf Exp $
8 * Unified implementation of memcpy, memmove and the __copy_user backend.
9 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
10 * they're not protected. In order to keep the exception fixup routine
11 * simple all memory accesses in __copy_user to src rsp. dst are stricly
12 * incremental. The fixup routine depends on $at not being changed.
15 #include <asm/offset.h>
16 #include <asm/regdef.h>
19 * The fixup routine for copy_to_user depends on copying strictly in
20 * increasing order. Gas expands the ulw/usw macros in the wrong order for
21 * little endian machines, so we cannot depend on them.
36 #define EX(insn,reg,addr,handler) \
38 .section __ex_table,"a"; \
42 #define UEX(insn,reg,addr,handler) \
43 9: insn ## L reg, addr; \
44 10: insn ## U reg, 3 + addr; \
45 .section __ex_table,"a"; \
50 /* ascending order, destination aligned */
51 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
52 EX(lw, t0, (offset + 0x00)(src), l_fixup); \
53 EX(lw, t1, (offset + 0x04)(src), l_fixup); \
54 EX(lw, t2, (offset + 0x08)(src), l_fixup); \
55 EX(lw, t3, (offset + 0x0c)(src), l_fixup); \
56 EX(sw, t0, (offset + 0x00)(dst), s_fixup); \
57 EX(sw, t1, (offset + 0x04)(dst), s_fixup); \
58 EX(sw, t2, (offset + 0x08)(dst), s_fixup); \
59 EX(sw, t3, (offset + 0x0c)(dst), s_fixup); \
60 EX(lw, t0, (offset + 0x10)(src), l_fixup); \
61 EX(lw, t1, (offset + 0x14)(src), l_fixup); \
62 EX(lw, t2, (offset + 0x18)(src), l_fixup); \
63 EX(lw, t3, (offset + 0x1c)(src), l_fixup); \
64 EX(sw, t0, (offset + 0x10)(dst), s_fixup); \
65 EX(sw, t1, (offset + 0x14)(dst), s_fixup); \
66 EX(sw, t2, (offset + 0x18)(dst), s_fixup); \
67 EX(sw, t3, (offset + 0x1c)(dst), s_fixup)
69 /* ascending order, destination unaligned */
70 #define UMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
71 EX(lw, t0, (offset + 0x00)(src), l_fixup); \
72 EX(lw, t1, (offset + 0x04)(src), l_fixup); \
73 EX(lw, t2, (offset + 0x08)(src), l_fixup); \
74 EX(lw, t3, (offset + 0x0c)(src), l_fixup); \
75 UEX(usw, t0, (offset + 0x00)(dst), s_fixup); \
76 UEX(usw, t1, (offset + 0x04)(dst), s_fixup); \
77 UEX(usw, t2, (offset + 0x08)(dst), s_fixup); \
78 UEX(usw, t3, (offset + 0x0c)(dst), s_fixup); \
79 EX(lw, t0, (offset + 0x10)(src), l_fixup); \
80 EX(lw, t1, (offset + 0x14)(src), l_fixup); \
81 EX(lw, t2, (offset + 0x18)(src), l_fixup); \
82 EX(lw, t3, (offset + 0x1c)(src), l_fixup); \
83 UEX(usw, t0, (offset + 0x10)(dst), s_fixup); \
84 UEX(usw, t1, (offset + 0x14)(dst), s_fixup); \
85 UEX(usw, t2, (offset + 0x18)(dst), s_fixup); \
86 UEX(usw, t3, (offset + 0x1c)(dst), s_fixup)
93 LEAF(memcpy) /* a0=dst a1=src a2=len */
94 move v0, a0 /* return value */
103 b memcpy_u_src # bad alignment
107 bnez t8, small_memcpy # < 8 bytes to copy
117 EX(lb, t0, (a1), l_fixup)
119 EX(sb, t0, (a0), s_fixup)
128 EX(lh, t0, (a1), l_fixup)
130 EX(sh, t0, (a0), s_fixup)
136 bnez t8, do_end_words
143 EX(lw, t0, 0x00(a1), l_fixup)
145 EX(sw, t0, 0x00(a0), s_fixup)
154 EX(lw, t0, 0x00(a1), l_fixup)
155 EX(lw, t1, 0x04(a1), l_fixup)
157 EX(sw, t0, 0x00(a0), s_fixup)
158 EX(sw, t1, 0x04(a0), s_fixup)
164 beqz t8, begin_movement
167 EX(lw, t3, 0x00(a1), l_fixup)
168 EX(lw, t4, 0x04(a1), l_fixup)
169 EX(lw, t0, 0x08(a1), l_fixup)
170 EX(lw, t1, 0x0c(a1), l_fixup)
171 EX(sw, t3, 0x00(a0), s_fixup)
172 EX(sw, t4, 0x04(a0), s_fixup)
173 EX(sw, t0, 0x08(a0), s_fixup)
174 EX(sw, t1, 0x0c(a0), s_fixup)
185 MOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
186 MOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
187 MOVE_BIGCHUNK(a1, a0, 0x40, t0, t1, t3, t4)
188 MOVE_BIGCHUNK(a1, a0, 0x60, t0, t1, t3, t4)
191 bnez t8, move_128bytes
199 MOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
200 MOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
205 beqz t2, do_end_words
209 MOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
215 beqz t8, maybe_end_cruft
219 EX(lw, t0, (a1), l_fixup)
221 EX(sw, t0, (a0), s_fixup)
234 EX(lb, t0, (a1), l_fixup)
236 EX(sb, t0, (a0), s_fixup)
244 /* ------------------------------------------------------------------------- */
246 /* Bad, bad. At least try to align the source */
249 bnez t8, small_memcpy # < 8 bytes?
252 addiu t0, a1, 7 # t0: how much to align
257 UEX(ulw, t1, 0(a1), l_fixup) # dword alignment
258 UEX(ulw, t2, 4(a1), l_fixup)
259 UEX(usw, t1, 0(a0), s_fixup)
260 UEX(usw, t2, 4(a0), s_fixup)
267 bnez t8, u_do_end_words
270 andi t8, a1, 8 # now qword aligned?
273 beqz t8, u_oword_align
276 EX(lw, t0, 0x00(a1), l_fixup)
277 EX(lw, t1, 0x04(a1), l_fixup)
279 UEX(usw, t0, 0x00(a0), s_fixup)
280 UEX(usw, t1, 0x04(a0), s_fixup)
286 beqz t8, u_begin_movement
289 EX(lw, t3, 0x08(a1), l_fixup)
290 EX(lw, t4, 0x0c(a1), l_fixup)
291 EX(lw, t0, 0x00(a1), l_fixup)
292 EX(lw, t1, 0x04(a1), l_fixup)
293 UEX(usw, t3, 0x08(a0), s_fixup)
294 UEX(usw, t4, 0x0c(a0), s_fixup)
295 UEX(usw, t0, 0x00(a0), s_fixup)
296 UEX(usw, t1, 0x04(a0), s_fixup)
307 UMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
308 UMOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
309 UMOVE_BIGCHUNK(a1, a0, 0x40, t0, t1, t3, t4)
310 UMOVE_BIGCHUNK(a1, a0, 0x60, t0, t1, t3, t4)
313 bnez t8, u_move_128bytes
321 UMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
322 UMOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
327 beqz t2, u_do_end_words
331 UMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
337 beqz t8, u_maybe_end_cruft
341 EX(lw, t0, 0x00(a1), l_fixup)
343 UEX(usw, t0, 0x00(a0), s_fixup)
356 EX(lb, t0, (a1), l_fixup)
358 EX(sb, t0, (a0), s_fixup)
367 /* descending order, destination aligned */
368 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
369 lw t0, (offset + 0x10)(src); \
370 lw t1, (offset + 0x14)(src); \
371 lw t2, (offset + 0x18)(src); \
372 lw t3, (offset + 0x1c)(src); \
373 sw t0, (offset + 0x10)(dst); \
374 sw t1, (offset + 0x14)(dst); \
375 sw t2, (offset + 0x18)(dst); \
376 sw t3, (offset + 0x1c)(dst); \
377 lw t0, (offset + 0x00)(src); \
378 lw t1, (offset + 0x04)(src); \
379 lw t2, (offset + 0x08)(src); \
380 lw t3, (offset + 0x0c)(src); \
381 sw t0, (offset + 0x00)(dst); \
382 sw t1, (offset + 0x04)(dst); \
383 sw t2, (offset + 0x08)(dst); \
384 sw t3, (offset + 0x0c)(dst)
386 /* descending order, destination ununaligned */
387 #define RUMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
388 lw t0, (offset + 0x10)(src); \
389 lw t1, (offset + 0x14)(src); \
390 lw t2, (offset + 0x18)(src); \
391 lw t3, (offset + 0x1c)(src); \
392 usw t0, (offset + 0x10)(dst); \
393 usw t1, (offset + 0x14)(dst); \
394 usw t2, (offset + 0x18)(dst); \
395 usw t3, (offset + 0x1c)(dst); \
396 lw t0, (offset + 0x00)(src); \
397 lw t1, (offset + 0x04)(src); \
398 lw t2, (offset + 0x08)(src); \
399 lw t3, (offset + 0x0c)(src); \
400 usw t0, (offset + 0x00)(dst); \
401 usw t1, (offset + 0x04)(dst); \
402 usw t2, (offset + 0x08)(dst); \
403 usw t3, (offset + 0x0c)(dst)
407 sltu t0, a0, a1 # dst < src -> memcpy
410 sltu t0, v0, a1 # dst + len < src -> non-
411 bnez t0, __memcpy # overlapping, can use memcpy
412 move v0, a0 /* return value */
415 LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
416 addu a0, a2 # dst = dst + len
417 addu a1, a2 # src = src + len
419 #if 0 /* Horror fix */
426 b r_memcpy_u_src # bad alignment
430 bnez t8, r_small_memcpy # < 8 bytes to copy
437 beqz t8, r_word_align
448 beqz t8, r_dword_align
459 bnez t8, r_do_end_words
463 beqz t8, r_qword_align
474 beqz t8, r_oword_align
488 beqz t8, r_begin_movement
492 lw t3, 0x08(a1) # assumes subblock ordering
509 RMOVE_BIGCHUNK(a1, a0, -0x80, t0, t1, t3, t4)
510 RMOVE_BIGCHUNK(a1, a0, -0x60, t0, t1, t3, t4)
511 RMOVE_BIGCHUNK(a1, a0, -0x40, t0, t1, t3, t4)
512 RMOVE_BIGCHUNK(a1, a0, -0x20, t0, t1, t3, t4)
515 bnez t8, r_move_128bytes
525 RMOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
526 RMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
529 beqz t2, r_do_end_words
535 RMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
539 beqz t8, r_maybe_end_cruft
556 #endif /* Horror fix */
570 #if 0 /* Horror fix */
571 /* ------------------------------------------------------------------------- */
573 /* Bad, bad. At least try to align the source */
576 bnez t8, r_small_memcpy # < 8 bytes?
579 andi t0, a1, 7 # t0: how much to align
581 ulw t1, -8(a1) # dword alignment
591 bnez t8, ru_do_end_words
594 andi t8, a1, 8 # now qword aligned?
597 beqz t8, ru_oword_align
611 beqz t8, ru_begin_movement
615 lw t3, 0x08(a1) # assumes subblock ordering
633 RUMOVE_BIGCHUNK(a1, a0, -0x80, t0, t1, t3, t4)
634 RUMOVE_BIGCHUNK(a1, a0, -0x60, t0, t1, t3, t4)
635 RUMOVE_BIGCHUNK(a1, a0, -0x40, t0, t1, t3, t4)
636 RUMOVE_BIGCHUNK(a1, a0, -0x20, t0, t1, t3, t4)
639 bnez t8, ru_move_128bytes
649 RUMOVE_BIGCHUNK(a1, a0, 0x20, t0, t1, t3, t4)
650 RUMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
653 beqz t2, ru_do_end_words
659 RUMOVE_BIGCHUNK(a1, a0, 0x00, t0, t1, t3, t4)
663 beqz t8, ru_maybe_end_cruft
671 bnez t8, ru_end_words
686 bnez a2, ru_end_bytes
692 #endif /* Horror fix */
694 l_fixup: # clear the rest of the buffer
695 lw t0, THREAD_BUADDR($28)
697 subu a2, AT, t0 # a2 bytes to go
698 addu a0, t0 # compute start address in a1