2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/etch-helpers.h"
19 #if defined(__x86_64__)
20 .file "hphp/util/memcpy-x64.S"
24 * _memcpy_short is a local helper used when length < 8. It cannot be called
25 * from outside, because it expects a non-standard calling convention:
27 * %rax: destination buffer address.
28 * %rsi: source buffer address.
29 * %rdx: length, in the range of [0, 7]
31 ETCH_TYPE(ETCH_NAME(_memcpy_short), @function)
32 ETCH_NAME(_memcpy_short):
35 // if (length == 0) return;
39 // We can safely read a byte here.
41 // if (length - 4 < 0) goto S4 ;
45 mov (%rsi, %rdx), %edi
47 mov %edi, (%rax, %rdx)
54 // At this point, length can be 1 or 2 or 3, and $cl contains
57 // if (length - 4 + 2 < 0) return;
61 // length is 2 or 3 here. In either case, just copy the last
63 movzwl (%rsi, %rdx), %ecx
68 ETCH_SIZE(_memcpy_short)
72 * void* _memcpy8(void* dst, void* src, size_t length);
75 * memcpy(dst, src, (length + 7) / 8 * 8)
76 * except that it returns dst + length instead of dst. It always copy 8-byte
77 * groups, and could overrun the buffers. If both src and dst starts at
78 * addresses aligned to 8-byte boundary, it is generally safe.
81 .globl ETCH_NAME(_memcpy8)
82 ETCH_TYPE(ETCH_NAME(_memcpy8), @function)
85 lea (%rdi, %rdx), %rax
96 * void* memcpy(void* dst, void* src, size_t length);
99 .globl ETCH_NAME(_hhvm_memcpy)
100 ETCH_TYPE(ETCH_NAME(_hhvm_memcpy), @function)
101 ETCH_NAME(_hhvm_memcpy):
109 mov -8(%rsi, %rdx), %r8
112 // This stores garbage if coming from _memcpy8, but it won't
113 // cause corretness problem. The address is writable, and the
114 // data there will be overwritten later. I don't want to move
115 // this store before L8 because that will slow down the
116 // loading of (%rsi).
117 mov %r8, -8(%rdi, %rdx)
126 movdqu (%rsi, %rcx), %xmm1
127 movdqu %xmm1, (%rdi, %rcx)
129 // Test if there are 32-byte groups
133 jnz ETCH_LABEL(R32_adjDI)
139 ETCH_LABEL(R32_adjDI):
142 * void* _bcopy32(void* dst, void* src, size_t length);
144 * This is the same as
145 * assert(length >= 32);
146 * memcpy(dst, src, length / 32 * 32);
147 * except that the return value cannot be used.
149 .globl ETCH_NAME(_bcopy32)
150 ETCH_TYPE(ETCH_NAME(_bcopy32), @function)
153 // Multiples of 32 bytes.
155 ETCH_LABEL(L32_16read):
156 movdqu 16(%rsi), %xmm1
158 // if ((rdx & 32) == 0) goto R64Byte_32read
160 jnc ETCH_LABEL(R64Byte_32read)
163 movdqu %xmm1, 16(%rdi)
165 jnz ETCH_LABEL(R64_adjDI)
168 ETCH_LABEL(R64_adjDI):
172 * void _bcopy_in_64(void* dst, void* src, size_t lengthIn64Bytes);
174 * This is the same as
175 * assert(lengthIn64Bytes > 0);
176 * memcpy(dst, src, 64 * lengthIn64Bytes);
177 * except that the return value cannot be used.
179 * Note that the length being copied is 64 * %rdx.
181 .globl ETCH_NAME(_bcopy_in_64)
182 ETCH_TYPE(ETCH_NAME(_bcopy_in_64), @function)
183 ETCH_NAME(_bcopy_in_64):
185 // Multiples of 64 bytes.
187 movdqu 16(%rsi), %xmm1
188 ETCH_LABEL(R64Byte_32read):
189 movdqu 32(%rsi), %xmm2
190 movdqu 48(%rsi), %xmm3
193 movdqu %xmm1, 16(%rdi)
194 movdqu %xmm2, 32(%rdi)
195 movdqu %xmm3, 48(%rdi)
198 jnz ETCH_LABEL(R64Byte)
202 * void* _memcpy16(void* dst, void* src, size_t length);
204 * This is the same as
205 * assert(length % 16 == 0 && length > 0);
206 * memcpy(dst, src, length);
208 .globl ETCH_NAME(_memcpy16)
209 ETCH_TYPE(ETCH_NAME(_memcpy16), @function)
210 ETCH_NAME(_memcpy16):
211 movdqu -16(%rsi, %rdx), %xmm3
215 // if (length & 16 == 0), it must be at least 32 bytes.
217 jz ETCH_LABEL(L32_16read)
219 movdqu %xmm3, -16(%rdi, %rdx)
221 jz ETCH_LABEL(32_OR_0)
223 // We have at least 64 bytes remaining. CF indicates whether
224 // we need to copy 32 bytes first.
225 movdqu 16(%rsi), %xmm1
226 jnc ETCH_LABEL(R64Byte_32read)
228 // Need to copy another 32 bytes and adjust rdi/rsi
231 movdqu %xmm1, 16(%rdi)
233 jmp ETCH_LABEL(R64Byte)
236 // The path for length == 16 comes through several conditional
237 // jumps. We expect (and should make) such sitatuions rare
238 // when this is invoked.
239 jnc ETCH_LABEL(END16)
240 movdqu 16(%rsi), %xmm1
242 movdqu %xmm1, 16(%rdi)
249 ETCH_SIZE(_hhvm_memcpy)
251 // When AVX2 is enabled, we use folly's memcpy instead of _hhvm_memcpy.
252 .globl ETCH_NAME(memcpy)
254 ETCH_NAME(memcpy) = ETCH_NAME(_folly_memcpy)
256 ETCH_NAME(memcpy) = ETCH_NAME(_hhvm_memcpy)
259 .ident "GCC: (GNU) 4.8.2"
261 .section .note.GNU-stack,"",@progbits