2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
32 /* Handle small chunks and last block of less than 32 bytes. */
37 movzbl (%rdi, %rsi), %edx
47 movzwl (%rdi, %rsi), %edx
57 movl (%rdi, %rsi), %edx
67 movq (%rdi, %rsi), %rdx
75 movdqu (%rdi, %rsi), %xmm0
82 leaq (%rdi, %rcx), %rcx
84 movzbl (%rsi, %rcx), %edx
117 /* For blocks bigger than 32 bytes
118 1. Advance one of the addr pointer to be 16B aligned.
119 2. Treat the case of both addr pointers aligned to 16B
120 separately to avoid movdqu.
121 3. Handle any blocks of greater than 64 consecutive bytes with
122 unrolling to reduce branches.
123 4. At least one addr pointer is 16B aligned, use memory version
134 /* Both pointers may be misaligned. */
136 movdqu (%rdi, %rsi), %xmm0
142 leaq 16(%rdi, %r8), %rdi
144 /* Handle two 16B aligned pointers separately. */
149 movdqu (%rdi, %rsi), %xmm0
150 pcmpeqb (%rdi), %xmm0
160 /* Pre-unroll to be ready for unrolled 64B loop. */
163 movdqu (%rdi,%rsi), %xmm0
164 pcmpeqb (%rdi), %xmm0
170 movdqu (%rdi,%rsi), %xmm0
171 pcmpeqb (%rdi), %xmm0
184 movdqu (%rdi,%rsi), %xmm0
185 pcmpeqb (%rdi), %xmm0
191 movdqu (%rdi,%rsi), %xmm0
192 pcmpeqb (%rdi), %xmm0
198 movdqu (%rdi,%rsi), %xmm0
199 pcmpeqb (%rdi), %xmm0
205 movdqu (%rdi,%rsi), %xmm0
206 pcmpeqb (%rdi), %xmm0
222 movdqu (%rdi,%rsi), %xmm0
223 pcmpeqb (%rdi), %xmm0
229 movdqu (%rdi,%rsi), %xmm0
230 pcmpeqb (%rdi), %xmm0
247 movzbl (%rdi, %rcx), %eax
249 movzbl (%rsi,%rcx), %edx
261 movdqa (%rdi,%rsi), %xmm0
262 pcmpeqb (%rdi), %xmm0
276 movdqa (%rdi,%rsi), %xmm0
277 pcmpeqb (%rdi), %xmm0
283 movdqa (%rdi,%rsi), %xmm0
284 pcmpeqb (%rdi), %xmm0
295 movdqa (%rdi,%rsi), %xmm0
296 pcmpeqb (%rdi), %xmm0
302 movdqa (%rdi,%rsi), %xmm0
303 pcmpeqb (%rdi), %xmm0
309 movdqa (%rdi,%rsi), %xmm0
310 pcmpeqb (%rdi), %xmm0
316 movdqa (%rdi,%rsi), %xmm0
317 pcmpeqb (%rdi), %xmm0
331 movdqa (%rdi,%rsi), %xmm0
332 pcmpeqb (%rdi), %xmm0
338 movdqa (%rdi,%rsi), %xmm0
339 pcmpeqb (%rdi), %xmm0
352 /* Align to 16byte to improve instruction fetch. */
357 weak_alias (memcmp, bcmp)
358 libc_hidden_builtin_def (memcmp)