2 * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
3 * Copyright (C) 2007 ARC International (UK) LTD
5 * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
10 #if !defined(__ARC700__) && !defined(__ARCHS__)
11 #error "Neither ARC700 nor ARCHS is defined!"
17 /* This memcpy implementation does not support objects of 1GB or larger -
18 the check for alignment does not work then. */
19 /* We assume that most sources and destinations are aligned, and
20 that also lengths are mostly a multiple of four, although to a lesser
25 brls.d r2,r3,.Lcopy_bytewise
42 #ifdef __LITTLE_ENDIAN__
48 #else /* BIG ENDIAN */
76 #endif /* __ARC700__ */
79 #ifdef __LITTLE_ENDIAN__
80 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
81 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
82 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
83 # define MERGE_2(RX,RY,IMM)
84 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
85 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
87 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
88 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
89 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
90 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
91 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
92 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
95 #if defined(__LL64__) || defined(__ARC_LL64__)
96 # define PREFETCH_READ(RX) prefetch [RX, 56]
97 # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
98 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
99 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
103 # define PREFETCH_READ(RX) prefetch [RX, 28]
104 # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
105 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
106 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
111 prefetch [r1] ; Prefetch the read location
112 prefetchw [r0] ; Prefetch the write location
116 mov r3, r0 ; don't clobber ret val
125 lpnz @.Laligndestination
132 ;;; Check the alignment of the source
134 bnz.d @.Lsourceunaligned
136 ;;; CASE 0: Both source and destination are 32bit aligned
137 ;;; Convert len to Dwords, unfold x4
138 lsr.f lp_count, r2, ZOLSHFT
139 lpnz @.Lcopy32_64bytes
153 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
155 lpnz @.Lcopyremainingbytes
159 .Lcopyremainingbytes:
166 beq.d @.LunalignedOffby2
169 bhi.d @.LunalignedOffby3
172 ;;; CASE 1: The source is unaligned, off by 1
173 ;; Hence I need to read 1 byte for a 16bit alignment
174 ;; and 2bytes to reach 32bit alignment
177 ;; Convert to words, unfold x2
178 lsr.f lp_count, r2, 3
183 ;; Both src and dst are aligned
187 prefetch [r1, 28] ;Prefetch the next read location
189 prefetchw [r3, 32] ;Prefetch the next write location
203 ;; Write back the remaining 16bits
204 EXTRACT_1 (r6, r5, 16)
206 ;; Write back the remaining 8bits
207 EXTRACT_2 (r5, r5, 16)
210 and.f lp_count, r2, 0x07 ;Last 8bytes
211 lpnz @.Lcopybytewise_1
219 ;;; CASE 2: The source is unaligned, off by 2
223 ;; Both src and dst are aligned
224 ;; Convert to words, unfold x2
225 lsr.f lp_count, r2, 3
226 #ifdef __BIG_ENDIAN__
232 prefetch [r1, 28] ;Prefetch the next read location
234 prefetchw [r3, 32] ;Prefetch the next write location
248 #ifdef __BIG_ENDIAN__
253 and.f lp_count, r2, 0x07 ;Last 8bytes
254 lpnz @.Lcopybytewise_2
262 ;;; CASE 3: The source is unaligned, off by 3
263 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
265 ;; Both src and dst are aligned
266 ;; Convert to words, unfold x2
267 lsr.f lp_count, r2, 3
268 #ifdef __BIG_ENDIAN__
274 prefetch [r1, 28] ;Prefetch the next read location
276 prefetchw [r3, 32] ;Prefetch the next write location
290 #ifdef __BIG_ENDIAN__
295 and.f lp_count, r2, 0x07 ;Last 8bytes
296 lpnz @.Lcopybytewise_3
302 #endif /* __ARCHS__ */
305 libc_hidden_def(memcpy)