Work around old buggy program which cannot cope with memcpy semantics.
[glibc.git] / sysdeps / x86_64 / memcpy.S
Commit [+]AuthorDateLineData
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +00001/*
2 Optimized memcpy for x86-64.
3
4 Copyright (C) 2007 Free Software Foundation, Inc.
5 Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007.
6
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +00007 This file is part of the GNU C Library.
78df0fcb
AJ
Andreas Jaeger2002-08-31 17:45:33 +00008
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000022 02111-1307 USA.
23*/
78df0fcb
AJ
Andreas Jaeger2002-08-31 17:45:33 +000024
25#include <sysdep.h>
26#include "asm-syntax.h"
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000027
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000028/* Stack slots in the red-zone. */
29
30#ifdef USE_AS_MEMPCPY
31# define RETVAL (0)
32#else
33# define RETVAL (-8)
0354e355
L
H.J. Lu2011-04-01 19:38:21 -040034# if defined SHARED && !defined USE_MULTIARCH && !defined NOT_IN_libc
35# define memcpy __memcpy
36# undef libc_hidden_builtin_def
37# define libc_hidden_builtin_def(name) \
38 .globl __GI_memcpy; __GI_memcpy = __memcpy
39# endif
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000040#endif
41#define SAVE0 (RETVAL - 8)
42#define SAVE1 (SAVE0 - 8)
43#define SAVE2 (SAVE1 - 8)
44#define SAVE3 (SAVE2 - 8)
78df0fcb
AJ
Andreas Jaeger2002-08-31 17:45:33 +000045
46 .text
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +000047
b5cc329c Ulrich Drepper2004-10-18 04:17:19 +000048#if defined PIC && !defined NOT_IN_libc
6fb8cbcb H.J. Lu2010-06-30 08:26:11 -070049ENTRY_CHK (__memcpy_chk)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +000050
b5cc329c
UD
Ulrich Drepper2004-10-18 04:17:19 +000051 cmpq %rdx, %rcx
52 jb HIDDEN_JUMPTARGET (__chk_fail)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +000053
6fb8cbcb H.J. Lu2010-06-30 08:26:11 -070054END_CHK (__memcpy_chk)
b5cc329c Ulrich Drepper2004-10-18 04:17:19 +000055#endif
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000056
57ENTRY(memcpy) /* (void *, const void*, size_t) */
58
59/* Handle tiny blocks. */
60
61L(1try): /* up to 32B */
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000062 cmpq $32, %rdx
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000063#ifndef USE_AS_MEMPCPY
64 movq %rdi, %rax /* save return value */
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000065#endif
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +000066 jae L(1after)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000067
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000068L(1): /* 1-byte once */
69 testb $1, %dl
70 jz L(1a)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000071
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000072 movzbl (%rsi), %ecx
73 movb %cl, (%rdi)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +000074
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +000075 incq %rsi
76 incq %rdi
77
78 .p2align 4,, 4
79
80L(1a): /* 2-byte once */
81 testb $2, %dl
82 jz L(1b)
83
84 movzwl (%rsi), %ecx
85 movw %cx, (%rdi)
86
87 addq $2, %rsi
88 addq $2, %rdi
89
90 .p2align 4,, 4
91
92L(1b): /* 4-byte once */
93 testb $4, %dl
94 jz L(1c)
95
96 movl (%rsi), %ecx
97 movl %ecx, (%rdi)
98
99 addq $4, %rsi
100 addq $4, %rdi
101
102 .p2align 4,, 4
103
104L(1c): /* 8-byte once */
105 testb $8, %dl
106 jz L(1d)
107
108 movq (%rsi), %rcx
109 movq %rcx, (%rdi)
110
111 addq $8, %rsi
112 addq $8, %rdi
113
114 .p2align 4,, 4
115
116L(1d): /* 16-byte loop */
117 andl $0xf0, %edx
118 jz L(exit)
119
120 .p2align 4
121
122L(1loop):
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000123 movq (%rsi), %rcx
124 movq 8(%rsi), %r8
125 movq %rcx, (%rdi)
126 movq %r8, 8(%rdi)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000127
128 subl $16, %edx
129
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000130 leaq 16(%rsi), %rsi
131 leaq 16(%rdi), %rdi
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000132
133 jnz L(1loop)
134
135 .p2align 4,, 4
136
137L(exit): /* exit */
138#ifdef USE_AS_MEMPCPY
139 movq %rdi, %rax /* return value */
140#else
141 rep
142#endif
143 retq
144
145 .p2align 4
146
147L(1after):
148#ifndef USE_AS_MEMPCPY
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000149 movq %rax, RETVAL(%rsp) /* save return value */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000150#endif
151
152/* Align to the natural word size. */
153
154L(aligntry):
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000155 movl %esi, %ecx /* align by source */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000156
157 andl $7, %ecx
158 jz L(alignafter) /* already aligned */
159
160L(align): /* align */
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000161 leaq -8(%rcx, %rdx), %rdx /* calculate remaining bytes */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000162 subl $8, %ecx
163
164 .p2align 4
165
166L(alignloop): /* 1-byte alignment loop */
167 movzbl (%rsi), %eax
168 movb %al, (%rdi)
169
170 incl %ecx
171
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000172 leaq 1(%rsi), %rsi
173 leaq 1(%rdi), %rdi
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000174
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000175 jnz L(alignloop)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000176
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000177 .p2align 4
178
179L(alignafter):
180
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000181/* Handle mid-sized blocks. */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000182
183L(32try): /* up to 1KB */
184 cmpq $1024, %rdx
185 ja L(32after)
186
187L(32): /* 32-byte loop */
188 movl %edx, %ecx
189 shrl $5, %ecx
190 jz L(32skip)
78df0fcb
AJ
Andreas Jaeger2002-08-31 17:45:33 +0000191
192 .p2align 4
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000193
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000194L(32loop):
195 decl %ecx
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000196
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000197 movq (%rsi), %rax
198 movq 8(%rsi), %r8
199 movq 16(%rsi), %r9
200 movq 24(%rsi), %r10
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000201
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000202 movq %rax, (%rdi)
203 movq %r8, 8(%rdi)
204 movq %r9, 16(%rdi)
205 movq %r10, 24(%rdi)
78df0fcb
AJ
Andreas Jaeger2002-08-31 17:45:33 +0000206
207 leaq 32(%rsi), %rsi
208 leaq 32(%rdi), %rdi
209
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000210 jz L(32skip) /* help out smaller blocks */
211
212 decl %ecx
213
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000214 movq (%rsi), %rax
215 movq 8(%rsi), %r8
216 movq 16(%rsi), %r9
217 movq 24(%rsi), %r10
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000218
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000219 movq %rax, (%rdi)
220 movq %r8, 8(%rdi)
221 movq %r9, 16(%rdi)
222 movq %r10, 24(%rdi)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000223
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000224 leaq 32(%rsi), %rsi
225 leaq 32(%rdi), %rdi
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000226
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000227 jnz L(32loop)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000228
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000229 .p2align 4
230
231L(32skip):
232 andl $31, %edx /* check for left overs */
233#ifdef USE_AS_MEMPCPY
234 jnz L(1)
235
236 movq %rdi, %rax
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000237#else
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000238 movq RETVAL(%rsp), %rax
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000239 jnz L(1)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000240
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000241 rep
242#endif
243 retq /* exit */
244
245 .p2align 4
246
247L(32after):
248
249/*
250 In order to minimize code-size in RTLD, algorithms specific for
251 larger blocks are excluded when building for RTLD.
252*/
253
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000254/* Handle blocks smaller than 1/2 L1. */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000255
256L(fasttry): /* first 1/2 L1 */
257#ifndef NOT_IN_libc /* only up to this algorithm outside of libc.so */
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000258 movq __x86_64_data_cache_size_half(%rip), %r11
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000259 cmpq %rdx, %r11 /* calculate the smaller of */
260 cmovaq %rdx, %r11 /* remaining bytes and 1/2 L1 */
261#endif
262
263L(fast): /* good ol' MOVS */
264#ifndef NOT_IN_libc
265 movq %r11, %rcx
266 andq $-8, %r11
267#else
268 movq %rdx, %rcx
269#endif
270 shrq $3, %rcx
271 jz L(fastskip)
272
273 rep
274 movsq
275
276 .p2align 4,, 4
277
278L(fastskip):
279#ifndef NOT_IN_libc
280 subq %r11, %rdx /* check for more */
281 testq $-8, %rdx
282 jnz L(fastafter)
283#endif
284
285 andl $7, %edx /* check for left overs */
286#ifdef USE_AS_MEMPCPY
287 jnz L(1)
288
289 movq %rdi, %rax
290#else
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000291 movq RETVAL(%rsp), %rax
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000292 jnz L(1)
293
294 rep
295#endif
296 retq /* exit */
297
298#ifndef NOT_IN_libc /* none of the algorithms below for RTLD */
299
300 .p2align 4
301
302L(fastafter):
303
304/* Handle large blocks smaller than 1/2 L2. */
305
306L(pretry): /* first 1/2 L2 */
307 movq __x86_64_shared_cache_size_half (%rip), %r8
308 cmpq %rdx, %r8 /* calculate the lesser of */
309 cmovaq %rdx, %r8 /* remaining bytes and 1/2 L2 */
310
311L(pre): /* 64-byte with prefetching */
312 movq %r8, %rcx
313 andq $-64, %r8
314 shrq $6, %rcx
315 jz L(preskip)
316
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000317 movq %r14, SAVE0(%rsp)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000318 cfi_rel_offset (%r14, SAVE0)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000319 movq %r13, SAVE1(%rsp)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000320 cfi_rel_offset (%r13, SAVE1)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000321 movq %r12, SAVE2(%rsp)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000322 cfi_rel_offset (%r12, SAVE2)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000323 movq %rbx, SAVE3(%rsp)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000324 cfi_rel_offset (%rbx, SAVE3)
325
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000326 cmpl $0, __x86_64_prefetchw(%rip)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000327 jz L(preloop) /* check if PREFETCHW OK */
328
329 .p2align 4
330
331/* ... when PREFETCHW is available (less cache-probe traffic in MP systems). */
332
333L(prewloop): /* cache-line in state M */
334 decq %rcx
335
336 movq (%rsi), %rax
337 movq 8 (%rsi), %rbx
338 movq 16 (%rsi), %r9
339 movq 24 (%rsi), %r10
340 movq 32 (%rsi), %r11
341 movq 40 (%rsi), %r12
342 movq 48 (%rsi), %r13
343 movq 56 (%rsi), %r14
344
345 prefetcht0 0 + 896 (%rsi)
346 prefetcht0 64 + 896 (%rsi)
347
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000348 movq %rax, (%rdi)
349 movq %rbx, 8(%rdi)
350 movq %r9, 16(%rdi)
351 movq %r10, 24(%rdi)
352 movq %r11, 32(%rdi)
353 movq %r12, 40(%rdi)
354 movq %r13, 48(%rdi)
355 movq %r14, 56(%rdi)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000356
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000357 leaq 64(%rsi), %rsi
358 leaq 64(%rdi), %rdi
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000359
360 jz L(prebail)
361
362 decq %rcx
363
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000364 movq (%rsi), %rax
365 movq 8(%rsi), %rbx
366 movq 16(%rsi), %r9
367 movq 24(%rsi), %r10
368 movq 32(%rsi), %r11
369 movq 40(%rsi), %r12
370 movq 48(%rsi), %r13
371 movq 56(%rsi), %r14
372
373 movq %rax, (%rdi)
374 movq %rbx, 8(%rdi)
375 movq %r9, 16(%rdi)
376 movq %r10, 24(%rdi)
377 movq %r11, 32(%rdi)
378 movq %r12, 40(%rdi)
379 movq %r13, 48(%rdi)
380 movq %r14, 56(%rdi)
381
382 prefetchw 896 - 64(%rdi)
383 prefetchw 896 - 0(%rdi)
384
385 leaq 64(%rsi), %rsi
386 leaq 64(%rdi), %rdi
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000387
388 jnz L(prewloop)
389 jmp L(prebail)
390
391 .p2align 4
392
393/* ... when PREFETCHW is not available. */
394
395L(preloop): /* cache-line in state E */
396 decq %rcx
397
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000398 movq (%rsi), %rax
399 movq 8(%rsi), %rbx
400 movq 16(%rsi), %r9
401 movq 24(%rsi), %r10
402 movq 32(%rsi), %r11
403 movq 40(%rsi), %r12
404 movq 48(%rsi), %r13
405 movq 56(%rsi), %r14
406
407 prefetcht0 896 + 0(%rsi)
408 prefetcht0 896 + 64(%rsi)
409
410 movq %rax, (%rdi)
411 movq %rbx, 8(%rdi)
412 movq %r9, 16(%rdi)
413 movq %r10, 24(%rdi)
414 movq %r11, 32(%rdi)
415 movq %r12, 40(%rdi)
416 movq %r13, 48(%rdi)
417 movq %r14, 56(%rdi)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000418
419 leaq 64 (%rsi), %rsi
420 leaq 64 (%rdi), %rdi
421
422 jz L(prebail)
423
424 decq %rcx
425
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000426 movq (%rsi), %rax
427 movq 8(%rsi), %rbx
428 movq 16(%rsi), %r9
429 movq 24(%rsi), %r10
430 movq 32(%rsi), %r11
431 movq 40(%rsi), %r12
432 movq 48(%rsi), %r13
433 movq 56(%rsi), %r14
434
435 prefetcht0 896 - 64(%rdi)
436 prefetcht0 896 - 0(%rdi)
437
438 movq %rax, (%rdi)
439 movq %rbx, 8(%rdi)
440 movq %r9, 16(%rdi)
441 movq %r10, 24(%rdi)
442 movq %r11, 32(%rdi)
443 movq %r12, 40(%rdi)
444 movq %r13, 48(%rdi)
445 movq %r14, 56(%rdi)
446
447 leaq 64(%rsi), %rsi
448 leaq 64(%rdi), %rdi
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000449
450 jnz L(preloop)
451
452L(prebail):
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000453 movq SAVE3(%rsp), %rbx
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000454 cfi_restore (%rbx)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000455 movq SAVE2(%rsp), %r12
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000456 cfi_restore (%r12)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000457 movq SAVE1(%rsp), %r13
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000458 cfi_restore (%r13)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000459 movq SAVE0(%rsp), %r14
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000460 cfi_restore (%r14)
461
462/* .p2align 4 */
463
464L(preskip):
465 subq %r8, %rdx /* check for more */
466 testq $-64, %rdx
467 jnz L(preafter)
468
469 andl $63, %edx /* check for left overs */
470#ifdef USE_AS_MEMPCPY
471 jnz L(1)
472
473 movq %rdi, %rax
474#else
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000475 movq RETVAL(%rsp), %rax
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000476 jnz L(1)
477
478 rep
479#endif
480 retq /* exit */
481
482 .p2align 4
483
484L(preafter):
485
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000486/* Handle huge blocks. */
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000487
488L(NTtry):
489
490L(NT): /* non-temporal 128-byte */
491 movq %rdx, %rcx
492 shrq $7, %rcx
493 jz L(NTskip)
494
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000495 movq %r14, SAVE0(%rsp)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000496 cfi_rel_offset (%r14, SAVE0)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000497 movq %r13, SAVE1(%rsp)
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000498 cfi_rel_offset (%r13, SAVE1)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000499 movq %r12, SAVE2(%rsp)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000500 cfi_rel_offset (%r12, SAVE2)
501
502 .p2align 4
503
504L(NTloop):
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000505 prefetchnta 768(%rsi)
506 prefetchnta 832(%rsi)
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000507
508 decq %rcx
509
0435403c
UD
Ulrich Drepper2007-09-22 05:54:03 +0000510 movq (%rsi), %rax
511 movq 8(%rsi), %r8
512 movq 16(%rsi), %r9
513 movq 24(%rsi), %r10
514 movq 32(%rsi), %r11
515 movq 40(%rsi), %r12
516 movq 48(%rsi), %r13
517 movq 56(%rsi), %r14
518
519 movntiq %rax, (%rdi)
520 movntiq %r8, 8(%rdi)
521 movntiq %r9, 16(%rdi)
522 movntiq %r10, 24(%rdi)
523 movntiq %r11, 32(%rdi)
524 movntiq %r12, 40(%rdi)
525 movntiq %r13, 48(%rdi)
526 movntiq %r14, 56(%rdi)
527
528 movq 64(%rsi), %rax
529 movq 72(%rsi), %r8
530 movq 80(%rsi), %r9
531 movq 88(%rsi), %r10
532 movq 96(%rsi), %r11
533 movq 104(%rsi), %r12
534 movq 112(%rsi), %r13
535 movq 120(%rsi), %r14
536
537 movntiq %rax, 64(%rdi)
538 movntiq %r8, 72(%rdi)
539 movntiq %r9, 80(%rdi)
540 movntiq %r10, 88(%rdi)
541 movntiq %r11, 96(%rdi)
542 movntiq %r12, 104(%rdi)
543 movntiq %r13, 112(%rdi)
544 movntiq %r14, 120(%rdi)
545
546 leaq 128(%rsi), %rsi
547 leaq 128(%rdi), %rdi
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000548
549 jnz L(NTloop)
550
551 sfence /* serialize memory stores */
552
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000553 movq SAVE2(%rsp), %r12
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000554 cfi_restore (%r12)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000555 movq SAVE1(%rsp), %r13
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000556 cfi_restore (%r13)
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000557 movq SAVE0(%rsp), %r14
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000558 cfi_restore (%r14)
559
560L(NTskip):
561 andl $127, %edx /* check for left overs */
562#ifdef USE_AS_MEMPCPY
563 jnz L(1)
564
565 movq %rdi, %rax
566#else
0435403c Ulrich Drepper2007-09-22 05:54:03 +0000567 movq RETVAL(%rsp), %rax
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000568 jnz L(1)
569
570 rep
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000571#endif
bfe6f5fa
UD
Ulrich Drepper2007-05-21 19:21:48 +0000572 retq /* exit */
573
574#endif /* !NOT_IN_libc */
575
576END(memcpy)
78df0fcb Andreas Jaeger2002-08-31 17:45:33 +0000577
bfe6f5fa Ulrich Drepper2007-05-21 19:21:48 +0000578#ifndef USE_AS_MEMPCPY
85dd1003 Ulrich Drepper2003-04-29 22:49:58 +0000579libc_hidden_builtin_def (memcpy)
0354e355
L
H.J. Lu2011-04-01 19:38:21 -0400580# if defined SHARED && !defined USE_MULTIARCH && !defined NOT_IN_libc
581# undef memcpy
582# include <shlib-compat.h>
583versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
584# endif
3dbfd811 Ulrich Drepper2004-05-28 06:56:51 +0000585#endif