2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Unified implementation of memcpy, memmove and the __copy_user backend.
8 * Copyright (C) 1998, 1999, 2000 Ralf Baechle
9 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
11 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
12 * they're not protected. In order to keep the exception fixup routine
13 * simple all memory accesses in __copy_user to src rsp. dst are stricly
14 * incremental. The fixup routine depends on $at not being changed.
17 #include <asm/offset.h>
18 #include <asm/regdef.h>
21 * The fixup routine for copy_to_user depends on copying strictly in
22 * increasing order. Gas expands the ulw/usw macros in the wrong order for
23 * little endian machines, so we cannot depend on them.
46 #define EX(insn,reg,addr,handler) \
48 .section __ex_table,"a"; \
52 #define UEX(insn,reg,addr,handler) \
53 9: insn ## L reg, addr; \
54 10: insn ## U reg, 3 + addr; \
55 .section __ex_table,"a"; \
60 #define UEXD(insn,reg,addr,handler) \
61 9: insn ## L reg, addr; \
62 10: insn ## U reg, 7 + addr; \
63 .section __ex_table,"a"; \
68 /* ascending order, destination aligned */
69 #define MOVE_BIGGERCHUNK(src, dst, offset, t0, t1, t2, t3) \
70 EX(ld, t0, (offset + 0x00)(src), l_fixup); \
71 EX(ld, t1, (offset + 0x08)(src), l_fixup); \
72 EX(ld, t2, (offset + 0x10)(src), l_fixup); \
73 EX(ld, t3, (offset + 0x18)(src), l_fixup); \
74 EX(sd, t0, (offset + 0x00)(dst), s_fixup); \
75 EX(sd, t1, (offset + 0x08)(dst), s_fixup); \
76 EX(sd, t2, (offset + 0x10)(dst), s_fixup); \
77 EX(sd, t3, (offset + 0x18)(dst), s_fixup); \
78 EX(ld, t0, (offset + 0x20)(src), l_fixup); \
79 EX(ld, t1, (offset + 0x28)(src), l_fixup); \
80 EX(ld, t2, (offset + 0x30)(src), l_fixup); \
81 EX(ld, t3, (offset + 0x38)(src), l_fixup); \
82 EX(sd, t0, (offset + 0x20)(dst), s_fixup); \
83 EX(sd, t1, (offset + 0x28)(dst), s_fixup); \
84 EX(sd, t2, (offset + 0x30)(dst), s_fixup); \
85 EX(sd, t3, (offset + 0x38)(dst), s_fixup)
87 /* ascending order, destination aligned */
88 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
89 EX(lw, t0, (offset + 0x00)(src), l_fixup); \
90 EX(lw, t1, (offset + 0x04)(src), l_fixup); \
91 EX(lw, t2, (offset + 0x08)(src), l_fixup); \
92 EX(lw, t3, (offset + 0x0c)(src), l_fixup); \
93 EX(sw, t0, (offset + 0x00)(dst), s_fixup); \
94 EX(sw, t1, (offset + 0x04)(dst), s_fixup); \
95 EX(sw, t2, (offset + 0x08)(dst), s_fixup); \
96 EX(sw, t3, (offset + 0x0c)(dst), s_fixup); \
97 EX(lw, t0, (offset + 0x10)(src), l_fixup); \
98 EX(lw, t1, (offset + 0x14)(src), l_fixup); \
99 EX(lw, t2, (offset + 0x18)(src), l_fixup); \
100 EX(lw, t3, (offset + 0x1c)(src), l_fixup); \
101 EX(sw, t0, (offset + 0x10)(dst), s_fixup); \
102 EX(sw, t1, (offset + 0x14)(dst), s_fixup); \
103 EX(sw, t2, (offset + 0x18)(dst), s_fixup); \
104 EX(sw, t3, (offset + 0x1c)(dst), s_fixup)
106 /* ascending order, destination unaligned */
107 #define UMOVE_BIGGERCHUNK(src, dst, offset, t0, t1, t2, t3) \
108 EX(ld, t0, (offset + 0x00)(src), l_fixup); \
109 EX(ld, t1, (offset + 0x08)(src), l_fixup); \
110 EX(ld, t2, (offset + 0x10)(src), l_fixup); \
111 EX(ld, t3, (offset + 0x18)(src), l_fixup); \
112 UEXD(usd, t0, (offset + 0x00)(dst), s_fixup); \
113 UEXD(usd, t1, (offset + 0x08)(dst), s_fixup); \
114 UEXD(usd, t2, (offset + 0x10)(dst), s_fixup); \
115 UEXD(usd, t3, (offset + 0x18)(dst), s_fixup); \
116 EX(ld, t0, (offset + 0x20)(src), l_fixup); \
117 EX(ld, t1, (offset + 0x28)(src), l_fixup); \
118 EX(ld, t2, (offset + 0x30)(src), l_fixup); \
119 EX(ld, t3, (offset + 0x38)(src), l_fixup); \
120 UEXD(usd, t0, (offset + 0x20)(dst), s_fixup); \
121 UEXD(usd, t1, (offset + 0x28)(dst), s_fixup); \
122 UEXD(usd, t2, (offset + 0x30)(dst), s_fixup); \
123 UEXD(usd, t3, (offset + 0x38)(dst), s_fixup)
125 /* ascending order, destination unaligned */
126 #define UMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
127 EX(lw, t0, (offset + 0x00)(src), l_fixup); \
128 EX(lw, t1, (offset + 0x04)(src), l_fixup); \
129 EX(lw, t2, (offset + 0x08)(src), l_fixup); \
130 EX(lw, t3, (offset + 0x0c)(src), l_fixup); \
131 UEX(usw, t0, (offset + 0x00)(dst), s_fixup); \
132 UEX(usw, t1, (offset + 0x04)(dst), s_fixup); \
133 UEX(usw, t2, (offset + 0x08)(dst), s_fixup); \
134 UEX(usw, t3, (offset + 0x0c)(dst), s_fixup); \
135 EX(lw, t0, (offset + 0x10)(src), l_fixup); \
136 EX(lw, t1, (offset + 0x14)(src), l_fixup); \
137 EX(lw, t2, (offset + 0x18)(src), l_fixup); \
138 EX(lw, t3, (offset + 0x1c)(src), l_fixup); \
139 UEX(usw, t0, (offset + 0x10)(dst), s_fixup); \
140 UEX(usw, t1, (offset + 0x14)(dst), s_fixup); \
141 UEX(usw, t2, (offset + 0x18)(dst), s_fixup); \
142 UEX(usw, t3, (offset + 0x1c)(dst), s_fixup)
149 LEAF(memcpy) /* a0=dst a1=src a2=len */
150 move v0, a0 /* return value */
159 b memcpy_u_src # bad alignment
163 bnez t8, small_memcpy # < 8 bytes to copy
173 EX(lb, ta0, (a1), l_fixup)
175 EX(sb, ta0, (a0), s_fixup)
184 EX(lh, ta0, (a1), l_fixup)
186 EX(sh, ta0, (a0), s_fixup)
192 bnez t8, do_end_words
199 EX(lw, ta0, 0x00(a1), l_fixup)
201 EX(sw, ta0, 0x00(a0), s_fixup)
210 EX(lw, ta0, 0x00(a1), l_fixup)
211 EX(lw, ta1, 0x04(a1), l_fixup)
213 EX(sw, ta0, 0x00(a0), s_fixup)
214 EX(sw, ta1, 0x04(a0), s_fixup)
220 beqz t8, begin_movement
223 EX(lw, ta3, 0x00(a1), l_fixup)
224 EX(lw, t0, 0x04(a1), l_fixup)
225 EX(lw, ta0, 0x08(a1), l_fixup)
226 EX(lw, ta1, 0x0c(a1), l_fixup)
227 EX(sw, ta3, 0x00(a0), s_fixup)
228 EX(sw, t0, 0x04(a0), s_fixup)
229 EX(sw, ta0, 0x08(a0), s_fixup)
230 EX(sw, ta1, 0x0c(a0), s_fixup)
243 MOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
244 MOVE_BIGGERCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
247 bnez t8, move_128bytes
255 MOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
260 beqz ta2, do_end_words
264 MOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
270 beqz t8, maybe_end_cruft
274 EX(lw, ta0, (a1), l_fixup)
276 EX(sw, ta0, (a0), s_fixup)
289 EX(lb, ta0, (a1), l_fixup)
291 EX(sb, ta0, (a0), s_fixup)
299 /* ------------------------------------------------------------------------- */
301 /* Bad, bad. At least try to align the source */
304 bnez t8, small_memcpy # < 8 bytes?
307 daddiu ta0, a1, 7 # ta0: how much to align
312 UEXD(uld, ta1, 0(a1), l_fixup) # dword alignment
313 UEXD(usd, ta1, 0(a0), s_fixup)
320 bnez t8, u_do_end_words
323 andi t8, a1, 8 # now qword aligned?
326 beqz t8, u_oword_align
329 EX(ld, ta0, 0x00(a1), l_fixup)
331 UEXD(usd, ta0, 0x00(a0), s_fixup)
337 beqz t8, u_begin_movement
340 EX(lw, ta3, 0x08(a1), l_fixup)
341 EX(lw, t0, 0x0c(a1), l_fixup)
342 EX(lw, ta0, 0x00(a1), l_fixup)
343 EX(lw, ta1, 0x04(a1), l_fixup)
344 UEX(usw, ta3, 0x08(a0), s_fixup)
345 UEX(usw, t0, 0x0c(a0), s_fixup)
346 UEX(usw, ta0, 0x00(a0), s_fixup)
347 UEX(usw, ta1, 0x04(a0), s_fixup)
358 UMOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
359 UMOVE_BIGGERCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
362 bnez t8, u_move_128bytes
370 UMOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
375 beqz ta2, u_do_end_words
379 UMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
385 beqz t8, u_maybe_end_cruft
389 EX(lw, ta0, 0x00(a1), l_fixup)
391 UEX(usw, ta0, 0x00(a0), s_fixup)
404 EX(lb, ta0, (a1), l_fixup)
406 EX(sb, ta0, (a0), s_fixup)
415 /* descending order, destination aligned */
416 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
417 lw t0, (offset + 0x10)(src); \
418 lw t1, (offset + 0x14)(src); \
419 lw t2, (offset + 0x18)(src); \
420 lw t3, (offset + 0x1c)(src); \
421 sw t0, (offset + 0x10)(dst); \
422 sw t1, (offset + 0x14)(dst); \
423 sw t2, (offset + 0x18)(dst); \
424 sw t3, (offset + 0x1c)(dst); \
425 lw t0, (offset + 0x00)(src); \
426 lw t1, (offset + 0x04)(src); \
427 lw t2, (offset + 0x08)(src); \
428 lw t3, (offset + 0x0c)(src); \
429 sw t0, (offset + 0x00)(dst); \
430 sw t1, (offset + 0x04)(dst); \
431 sw t2, (offset + 0x08)(dst); \
432 sw t3, (offset + 0x0c)(dst)
434 /* descending order, destination ununaligned */
435 #define RUMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
436 lw t0, (offset + 0x10)(src); \
437 lw t1, (offset + 0x14)(src); \
438 lw t2, (offset + 0x18)(src); \
439 lw t3, (offset + 0x1c)(src); \
440 usw t0, (offset + 0x10)(dst); \
441 usw t1, (offset + 0x14)(dst); \
442 usw t2, (offset + 0x18)(dst); \
443 usw t3, (offset + 0x1c)(dst); \
444 lw t0, (offset + 0x00)(src); \
445 lw t1, (offset + 0x04)(src); \
446 lw t2, (offset + 0x08)(src); \
447 lw t3, (offset + 0x0c)(src); \
448 usw t0, (offset + 0x00)(dst); \
449 usw t1, (offset + 0x04)(dst); \
450 usw t2, (offset + 0x08)(dst); \
451 usw t3, (offset + 0x0c)(dst)
455 sltu ta0, a0, a1 # dst < src -> memcpy
458 sltu ta0, v0, a1 # dst + len < src -> non-
459 bnez ta0, __memcpy # overlapping, can use memcpy
460 move v0, a0 /* return value */
463 LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
464 daddu a0, a2 # dst = dst + len
465 daddu a1, a2 # src = src + len
467 #if 0 /* Horror fix */
471 beqz ta0, r_can_align
474 b r_memcpy_u_src # bad alignment
478 bnez t8, r_small_memcpy # < 8 bytes to copy
485 beqz t8, r_word_align
496 beqz t8, r_dword_align
507 bnez t8, r_do_end_words
511 beqz t8, r_qword_align
522 beqz t8, r_oword_align
536 beqz t8, r_begin_movement
540 lw ta3, 0x08(a1) # assumes subblock ordering
557 RMOVE_BIGCHUNK(a1, a0, -0x80, ta0, ta1, ta3, t0)
558 RMOVE_BIGCHUNK(a1, a0, -0x60, ta0, ta1, ta3, t0)
559 RMOVE_BIGCHUNK(a1, a0, -0x40, ta0, ta1, ta3, t0)
560 RMOVE_BIGCHUNK(a1, a0, -0x20, ta0, ta1, ta3, t0)
563 bnez t8, r_move_128bytes
573 RMOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
574 RMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
577 beqz ta2, r_do_end_words
583 RMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
587 beqz t8, r_maybe_end_cruft
604 #endif /* Horror fix */
618 #if 0 /* Horror fix */
619 /* ------------------------------------------------------------------------- */
621 /* Bad, bad. At least try to align the source */
624 bnez t8, r_small_memcpy # < 8 bytes?
627 andi ta0, a1, 7 # ta0: how much to align
629 ulw ta1, -8(a1) # dword alignment
639 bnez t8, ru_do_end_words
642 andi t8, a1, 8 # now qword aligned?
645 beqz t8, ru_oword_align
659 beqz t8, ru_begin_movement
663 lw ta3, 0x08(a1) # assumes subblock ordering
681 RUMOVE_BIGCHUNK(a1, a0, -0x80, ta0, ta1, ta3, t0)
682 RUMOVE_BIGCHUNK(a1, a0, -0x60, ta0, ta1, ta3, t0)
683 RUMOVE_BIGCHUNK(a1, a0, -0x40, ta0, ta1, ta3, t0)
684 RUMOVE_BIGCHUNK(a1, a0, -0x20, ta0, ta1, ta3, t0)
687 bnez t8, ru_move_128bytes
697 RUMOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
698 RUMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
701 beqz ta2, ru_do_end_words
707 RUMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
711 beqz t8, ru_maybe_end_cruft
719 bnez t8, ru_end_words
734 bnez a2, ru_end_bytes
739 #endif /* Horror fix */
742 l_fixup: # clear the rest of the buffer
743 ld ta0, THREAD_BUADDR($28)
745 dsubu a2, AT, ta0 # a2 bytes to go
746 daddu a0, ta0 # compute start address in a1