PR 50016 Slow I/O on MingW due to _commit
[official-gcc.git] / libitm / memcpy.cc
blob9919e6a9afaceb02de99742576fa01c9edff454f
1 /* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Transactional Memory Library (libitm).
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #include "libitm_i.h"
27 using namespace GTM;
29 static void
30 do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
31 abi_dispatch::lock_type W, abi_dispatch::lock_type R)
33 abi_dispatch *disp = abi_disp();
34 // The position in the destination cacheline where *IDST starts.
35 uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
36 // The position in the source cacheline where *ISRC starts.
37 uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
38 const gtm_cacheline *src
39 = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
40 gtm_cacheline *dst
41 = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
42 const gtm_cacheline *sline;
43 abi_dispatch::mask_pair dpair;
45 if (size == 0)
46 return;
48 // If both SRC and DST data start at the same position in the cachelines,
49 // we can easily copy the data in tandem, cacheline by cacheline...
50 if (dofs == sofs)
52 // We copy the data in three stages:
54 // (a) Copy stray bytes at the beginning that are smaller than a
55 // cacheline.
56 if (sofs != 0)
58 size_t sleft = CACHELINE_SIZE - sofs;
59 size_t min = (size <= sleft ? size : sleft);
61 dpair = disp->write_lock(dst, W);
62 sline = disp->read_lock(src, R);
63 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
64 memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
65 dst++;
66 src++;
67 size -= min;
70 // (b) Copy subsequent cacheline sized chunks.
71 while (size >= CACHELINE_SIZE)
73 dpair = disp->write_lock(dst, W);
74 sline = disp->read_lock(src, R);
75 *dpair.mask = -1;
76 *dpair.line = *sline;
77 dst++;
78 src++;
79 size -= CACHELINE_SIZE;
82 // (c) Copy anything left over.
83 if (size != 0)
85 dpair = disp->write_lock(dst, W);
86 sline = disp->read_lock(src, R);
87 *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
88 memcpy (dpair.line, sline, size);
91 // ... otherwise, we must copy the data in disparate hunks using
92 // temporary storage.
93 else
95 gtm_cacheline c;
96 size_t sleft = CACHELINE_SIZE - sofs;
98 sline = disp->read_lock(src, R);
100 // As above, we copy the data in three stages:
102 // (a) Copy stray bytes at the beginning that are smaller than a
103 // cacheline.
104 if (dofs != 0)
106 size_t dleft = CACHELINE_SIZE - dofs;
107 size_t min = (size <= dleft ? size : dleft);
109 dpair = disp->write_lock(dst, W);
110 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
112 // If what's left in the source cacheline will fit in the
113 // rest of the destination cacheline, straight up copy it.
114 if (min <= sleft)
116 memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
117 sofs += min;
119 // Otherwise, we need more bits from the source cacheline
120 // that are available. Piece together what we need from
121 // contiguous (source) cachelines, into temp space, and copy
122 // it over.
123 else
125 memcpy (&c, &sline->b[sofs], sleft);
126 sline = disp->read_lock(++src, R);
127 sofs = min - sleft;
128 memcpy (&c.b[sleft], sline, sofs);
129 memcpy (&dpair.line->b[dofs], &c, min);
131 sleft = CACHELINE_SIZE - sofs;
133 dst++;
134 size -= min;
137 // (b) Copy subsequent cacheline sized chunks.
138 while (size >= CACHELINE_SIZE)
140 // We have a full (destination) cacheline where to put the
141 // data, but to get to the corresponding cacheline sized
142 // chunk in the source, we have to piece together two
143 // contiguous source cachelines.
145 memcpy (&c, &sline->b[sofs], sleft);
146 sline = disp->read_lock(++src, R);
147 memcpy (&c.b[sleft], sline, sofs);
149 dpair = disp->write_lock(dst, W);
150 *dpair.mask = -1;
151 *dpair.line = c;
153 dst++;
154 size -= CACHELINE_SIZE;
157 // (c) Copy anything left over.
158 if (size != 0)
160 dpair = disp->write_lock(dst, W);
161 *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
162 // If what's left to copy is entirely in the remaining
163 // source cacheline, do it.
164 if (size <= sleft)
165 memcpy (dpair.line, &sline->b[sofs], size);
166 // Otherwise, piece together the remaining bits, and copy.
167 else
169 memcpy (&c, &sline->b[sofs], sleft);
170 sline = disp->read_lock(++src, R);
171 memcpy (&c.b[sleft], sline, size - sleft);
172 memcpy (dpair.line, &c, size);
178 static void
179 do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
180 abi_dispatch::lock_type W, abi_dispatch::lock_type R)
182 abi_dispatch *disp = abi_disp();
183 uintptr_t dleft, sleft, sofs, dofs;
184 const gtm_cacheline *sline;
185 abi_dispatch::mask_pair dpair;
187 if (size == 0)
188 return;
190 /* The co-aligned memmove below doesn't work for DST == SRC, so filter
191 that out. It's tempting to just return here, as this is a no-op move.
192 However, our caller has the right to expect the locks to be acquired
193 as advertized. */
194 if (__builtin_expect (idst == isrc, 0))
196 /* If the write lock is already acquired, nothing to do. */
197 if (W == abi_dispatch::WaW)
198 return;
199 /* If the destination is protected, acquire a write lock. */
200 if (W != abi_dispatch::NOLOCK)
201 R = abi_dispatch::RfW;
202 /* Notice serial mode, where we don't acquire locks at all. */
203 if (R == abi_dispatch::NOLOCK)
204 return;
206 idst = isrc + size;
207 for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
208 disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
209 return;
212 /* Fall back to memcpy if the implementation above can handle it. */
213 if (idst < isrc || isrc + size <= idst)
215 do_memcpy (idst, isrc, size, W, R);
216 return;
219 /* What remains requires a backward copy from the end of the blocks. */
220 idst += size;
221 isrc += size;
222 dofs = idst & (CACHELINE_SIZE - 1);
223 sofs = isrc & (CACHELINE_SIZE - 1);
224 dleft = CACHELINE_SIZE - dofs;
225 sleft = CACHELINE_SIZE - sofs;
227 gtm_cacheline *dst
228 = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
229 const gtm_cacheline *src
230 = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
231 if (dofs == 0)
232 dst--;
233 if (sofs == 0)
234 src--;
236 if (dofs == sofs)
238 /* Since DST and SRC are co-aligned, and we didn't use the memcpy
239 optimization above, that implies that SIZE > CACHELINE_SIZE. */
240 if (sofs != 0)
242 dpair = disp->write_lock(dst, W);
243 sline = disp->read_lock(src, R);
244 *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
245 memcpy (dpair.line, sline, sleft);
246 dst--;
247 src--;
248 size -= sleft;
251 while (size >= CACHELINE_SIZE)
253 dpair = disp->write_lock(dst, W);
254 sline = disp->read_lock(src, R);
255 *dpair.mask = -1;
256 *dpair.line = *sline;
257 dst--;
258 src--;
259 size -= CACHELINE_SIZE;
262 if (size != 0)
264 size_t ofs = CACHELINE_SIZE - size;
265 dpair = disp->write_lock(dst, W);
266 sline = disp->read_lock(src, R);
267 *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
268 memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
271 else
273 gtm_cacheline c;
275 sline = disp->read_lock(src, R);
276 if (dofs != 0)
278 size_t min = (size <= dofs ? size : dofs);
280 if (min <= sofs)
282 sofs -= min;
283 memcpy (&c, &sline->b[sofs], min);
285 else
287 size_t min_ofs = min - sofs;
288 memcpy (&c.b[min_ofs], sline, sofs);
289 sline = disp->read_lock(--src, R);
290 sofs = CACHELINE_SIZE - min_ofs;
291 memcpy (&c, &sline->b[sofs], min_ofs);
294 dofs = dleft - min;
295 dpair = disp->write_lock(dst, W);
296 *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
297 memcpy (&dpair.line->b[dofs], &c, min);
299 sleft = CACHELINE_SIZE - sofs;
300 dst--;
301 size -= min;
304 while (size >= CACHELINE_SIZE)
306 memcpy (&c.b[sleft], sline, sofs);
307 sline = disp->read_lock(--src, R);
308 memcpy (&c, &sline->b[sofs], sleft);
310 dpair = disp->write_lock(dst, W);
311 *dpair.mask = -1;
312 *dpair.line = c;
314 dst--;
315 size -= CACHELINE_SIZE;
318 if (size != 0)
320 dofs = CACHELINE_SIZE - size;
322 memcpy (&c.b[sleft], sline, sofs);
323 if (sleft > dofs)
325 sline = disp->read_lock(--src, R);
326 memcpy (&c, &sline->b[sofs], sleft);
329 dpair = disp->write_lock(dst, W);
330 *dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
331 memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
336 #define ITM_MEM_DEF(NAME, READ, WRITE) \
337 void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \
339 do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \
340 abi_dispatch::WRITE, abi_dispatch::READ); \
342 void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \
344 do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \
345 abi_dispatch::WRITE, abi_dispatch::READ); \
348 ITM_MEM_DEF(RnWt, NOLOCK, W)
349 ITM_MEM_DEF(RnWtaR, NOLOCK, WaR)
350 ITM_MEM_DEF(RnWtaW, NOLOCK, WaW)
352 ITM_MEM_DEF(RtWn, R, NOLOCK)
353 ITM_MEM_DEF(RtWt, R, W)
354 ITM_MEM_DEF(RtWtaR, R, WaR)
355 ITM_MEM_DEF(RtWtaW, R, WaW)
357 ITM_MEM_DEF(RtaRWn, RaR, NOLOCK)
358 ITM_MEM_DEF(RtaRWt, RaR, W)
359 ITM_MEM_DEF(RtaRWtaR, RaR, WaR)
360 ITM_MEM_DEF(RtaRWtaW, RaR, WaW)
362 ITM_MEM_DEF(RtaWWn, RaW, NOLOCK)
363 ITM_MEM_DEF(RtaWWt, RaW, W)
364 ITM_MEM_DEF(RtaWWtaR, RaW, WaR)
365 ITM_MEM_DEF(RtaWWtaW, RaW, WaW)