1 /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
2 /* Modified by SuperH, Inc. September 2003 */
6 ! by Toshiyasu Morita (tm@netcom.com)
7 ! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
8 ! SH5 code Copyright 2002 SuperH Ltd.
10 ! Entry: ARG0: destination pointer
11 ! ARG1: source pointer
14 ! Exit: RESULT: destination pointer
15 ! any other registers in the range r0-r7: trashed
17 ! Notes: Usually one wants to do small reads and write a longword, but
18 ! unfortunately it is difficult in some cases to concatanate bytes
19 ! into a longword on the SH, so this does a longword read and small
22 ! This implementation makes two assumptions about how it is called:
24 ! 1.: If the byte count is nonzero, the address of the last byte to be
25 ! copied is unsigned greater than the address of the first byte to
26 ! be copied. This could be easily swapped for a signed comparison,
27 ! but the algorithm used needs some comparison.
29 ! 2.: When there are two or three bytes in the last word of an 11-or-more
30 ! bytes memory chunk to b copied, the rest of the word can be read
31 ! without side effects.
32 ! This could be easily changed by increasing the minimum size of
33 ! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
34 ! however, this would cost a few extra cyles on average.
35 ! For SHmedia, the assumption is that any quadword can be read in its
36 ! enirety if at least one byte is included in the copy.
39 .section .text..SHmedia32,"ax"
41 .type memcpy, @function
46 #define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
47 #define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
48 #define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
49 #define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
57 movi (L1-L0+63*32 + 1) & 0xffff,r1
65 /* Rearranged to make cut2 safe */
67 L4_7: /* 4..7 byte memcpy cntd. */
75 L1: /* 0 byte memcpy */
83 L2_3: /* 2 or 3 byte memcpy cntd. */
92 L8_15: /* 8..15 byte memcpy cntd. */
99 /* 2 or 3 byte memcpy */
109 /* 4 .. 7 byte memcpy */
110 LDUAL (r3, 0, r0, r1)
118 /* 8 .. 15 byte memcpy */
119 LDUAQ (r3, 0, r0, r1)
127 /* 16 .. 24 byte memcpy */
128 LDUAQ (r3, 0, r0, r1)
129 LDUAQ (r3, 8, r8, r9)
152 movi 64+8, r27 // could subtract r7 from that.
201 .size memcpy,.-memcpy