Move all files into ports/ subdirectory in preparation for merge with glibc
[glibc.git] / ports / sysdeps / tile / tilegx / memcpy.c
blobdd6e30dd60783915721466a6a6aafe444f8cd555
1 /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <string.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <arch/chip.h>
24 /* Must be 8 bytes in size. */
25 #define word_t uint64_t
27 /* How many cache lines ahead should we prefetch? */
28 #define PREFETCH_LINES_AHEAD 3
30 void *
31 __memcpy (void *__restrict dstv, const void *__restrict srcv, size_t n)
33 char *__restrict dst1 = (char *) dstv;
34 const char *__restrict src1 = (const char *) srcv;
35 const char *__restrict src1_end;
36 const char *__restrict prefetch;
37 word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
38 word_t final; /* Final bytes to write to trailing word, if any */
39 long i;
41 if (n < 16)
43 for (; n; n--)
44 *dst1++ = *src1++;
45 return dstv;
48 /* Locate the end of source memory we will copy. Don't prefetch
49 past this. */
50 src1_end = src1 + n - 1;
52 /* Prefetch ahead a few cache lines, but not past the end. */
53 prefetch = src1;
54 for (i = 0; i < PREFETCH_LINES_AHEAD; i++)
56 __insn_prefetch (prefetch);
57 prefetch += CHIP_L2_LINE_SIZE ();
58 prefetch = (prefetch > src1_end) ? prefetch : src1;
61 /* Copy bytes until dst is word-aligned. */
62 for (; (uintptr_t) dst1 & (sizeof (word_t) - 1); n--)
63 *dst1++ = *src1++;
65 /* 8-byte pointer to destination memory. */
66 dst8 = (word_t *) dst1;
68 if (__builtin_expect ((uintptr_t) src1 & (sizeof (word_t) - 1), 0))
70 /* Misaligned copy. Copy 8 bytes at a time, but don't bother
71 with other fanciness.
72 TODO: Consider prefetching and using wh64 as well. */
74 /* Create an aligned src8. */
75 const word_t *__restrict src8 =
76 (const word_t *) ((uintptr_t) src1 & -sizeof (word_t));
77 word_t b;
79 word_t a = *src8++;
80 for (; n >= sizeof (word_t); n -= sizeof (word_t))
82 b = *src8++;
83 a = __insn_dblalign (a, b, src1);
84 *dst8++ = a;
85 a = b;
88 if (n == 0)
89 return dstv;
91 b = ((const char *) src8 <= src1_end) ? *src8 : 0;
93 /* Final source bytes to write to trailing partial word, if any. */
94 final = __insn_dblalign (a, b, src1);
96 else
98 /* Aligned copy. */
100 const word_t *__restrict src8 = (const word_t *) src1;
102 /* src8 and dst8 are both word-aligned. */
103 if (n >= CHIP_L2_LINE_SIZE ())
105 /* Copy until 'dst' is cache-line-aligned. */
106 for (; (uintptr_t) dst8 & (CHIP_L2_LINE_SIZE () - 1);
107 n -= sizeof (word_t))
108 *dst8++ = *src8++;
110 /* If copying to self, return. The test is cheap enough
111 that we do it despite the fact that the memcpy() contract
112 doesn't require us to support overlapping dst and src.
113 This is the most common case of overlap, and any close
114 overlap will cause corruption due to the wh64 below.
115 This case is particularly important since the compiler
116 will emit memcpy() calls for aggregate copies even if it
117 can't prove that src != dst. */
118 if (__builtin_expect (dst8 == src8, 0))
119 return dstv;
121 for (; n >= CHIP_L2_LINE_SIZE ();)
123 __insn_wh64 (dst8);
125 /* Prefetch and advance to next line to prefetch, but
126 don't go past the end. */
127 __insn_prefetch (prefetch);
128 prefetch += CHIP_L2_LINE_SIZE ();
129 prefetch = (prefetch > src1_end) ? prefetch :
130 (const char *) src8;
132 /* Copy an entire cache line. Manually unrolled to
133 avoid idiosyncracies of compiler unrolling. */
134 #define COPY_WORD(offset) ({ dst8[offset] = src8[offset]; n -= 8; })
135 COPY_WORD (0);
136 COPY_WORD (1);
137 COPY_WORD (2);
138 COPY_WORD (3);
139 COPY_WORD (4);
140 COPY_WORD (5);
141 COPY_WORD (6);
142 COPY_WORD (7);
143 #if CHIP_L2_LINE_SIZE() != 64
144 # error "Fix code that assumes particular L2 cache line size."
145 #endif
147 dst8 += CHIP_L2_LINE_SIZE () / sizeof (word_t);
148 src8 += CHIP_L2_LINE_SIZE () / sizeof (word_t);
152 for (; n >= sizeof (word_t); n -= sizeof (word_t))
153 *dst8++ = *src8++;
155 if (__builtin_expect (n == 0, 1))
156 return dstv;
158 final = *src8;
161 /* n != 0 if we get here. Write out any trailing bytes. */
162 dst1 = (char *) dst8;
163 #ifndef __BIG_ENDIAN__
164 if (n & 4)
166 *(uint32_t *) dst1 = final;
167 dst1 += 4;
168 final >>= 32;
169 n &= 3;
171 if (n & 2)
173 *(uint16_t *) dst1 = final;
174 dst1 += 2;
175 final >>= 16;
176 n &= 1;
178 if (n)
179 *(uint8_t *) dst1 = final;
180 #else
181 if (n & 4)
183 *(uint32_t *) dst1 = final >> 32;
184 dst1 += 4;
186 else
188 final >>= 32;
190 if (n & 2)
192 *(uint16_t *) dst1 = final >> 16;
193 dst1 += 2;
195 else
197 final >>= 16;
199 if (n & 1)
200 *(uint8_t *) dst1 = final >> 8;
201 #endif
203 return dstv;
205 weak_alias (__memcpy, memcpy)
206 libc_hidden_builtin_def (memcpy)