2 * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
3 * Copyright (C) 2007 ARC International (UK) LTD
5 * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
12 #if defined(__ARC700__)
13 /* This memcpy implementation does not support objects of 1GB or larger -
14 the check for alignment does not work then. */
15 /* We assume that most sources and destinations are aligned, and
16 that also lengths are mostly a multiple of four, although to a lesser
21 brls.d r2,r3,.Lcopy_bytewise
38 #ifdef __LITTLE_ENDIAN__
44 #else /* BIG ENDIAN */
73 #elif defined(__ARCHS__)
75 #ifdef __LITTLE_ENDIAN__
76 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
77 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
78 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
79 # define MERGE_2(RX,RY,IMM)
80 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
81 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
83 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
84 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
85 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
86 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
87 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
88 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
91 #if defined(__LL64__) || defined(__ARC_LL64__)
92 # define PREFETCH_READ(RX) prefetch [RX, 56]
93 # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
94 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
95 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
99 # define PREFETCH_READ(RX) prefetch [RX, 28]
100 # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
101 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
102 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
107 prefetch [r1] ; Prefetch the read location
108 prefetchw [r0] ; Prefetch the write location
112 mov r3, r0 ; don't clobber ret val
121 lpnz @.Laligndestination
128 ;;; Check the alignment of the source
130 bnz.d @.Lsourceunaligned
132 ;;; CASE 0: Both source and destination are 32bit aligned
133 ;;; Convert len to Dwords, unfold x4
134 lsr.f lp_count, r2, ZOLSHFT
135 lpnz @.Lcopy32_64bytes
149 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
151 lpnz @.Lcopyremainingbytes
155 .Lcopyremainingbytes:
162 beq.d @.LunalignedOffby2
165 bhi.d @.LunalignedOffby3
168 ;;; CASE 1: The source is unaligned, off by 1
169 ;; Hence I need to read 1 byte for a 16bit alignment
170 ;; and 2bytes to reach 32bit alignment
173 ;; Convert to words, unfold x2
174 lsr.f lp_count, r2, 3
179 ;; Both src and dst are aligned
183 prefetch [r1, 28] ;Prefetch the next read location
185 prefetchw [r3, 32] ;Prefetch the next write location
199 ;; Write back the remaining 16bits
200 EXTRACT_1 (r6, r5, 16)
202 ;; Write back the remaining 8bits
203 EXTRACT_2 (r5, r5, 16)
206 and.f lp_count, r2, 0x07 ;Last 8bytes
207 lpnz @.Lcopybytewise_1
215 ;;; CASE 2: The source is unaligned, off by 2
219 ;; Both src and dst are aligned
220 ;; Convert to words, unfold x2
221 lsr.f lp_count, r2, 3
222 #ifdef __BIG_ENDIAN__
228 prefetch [r1, 28] ;Prefetch the next read location
230 prefetchw [r3, 32] ;Prefetch the next write location
244 #ifdef __BIG_ENDIAN__
249 and.f lp_count, r2, 0x07 ;Last 8bytes
250 lpnz @.Lcopybytewise_2
258 ;;; CASE 3: The source is unaligned, off by 3
259 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
261 ;; Both src and dst are aligned
262 ;; Convert to words, unfold x2
263 lsr.f lp_count, r2, 3
264 #ifdef __BIG_ENDIAN__
270 prefetch [r1, 28] ;Prefetch the next read location
272 prefetchw [r3, 32] ;Prefetch the next write location
286 #ifdef __BIG_ENDIAN__
291 and.f lp_count, r2, 0x07 ;Last 8bytes
292 lpnz @.Lcopybytewise_3
299 #elif defined(__ARC64_ARCH32__)
300 ;; Based on Synopsys code from newlib's arc64/memcpy.S
301 lsr.f r11, r2, 4 ; counter for 16-byte chunks
302 beq.d @.L_write_15_bytes
303 mov r3, r0 ; work on a copy of "r0"
306 #if defined(__ARC64_LL64__)
311 dbnz r11, @.L_write_16_bytes
320 dbnz.d r11, @.L_write_16_bytes
348 #error "Unsupported ARC CPU type"
352 libc_hidden_def(memcpy)