2 Copyright (C) 2009 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
33 /* Handle small chunks and last block of less than 32 bytes. */
38 movzbl (%rdi, %rsi), %edx
48 movzwl (%rdi, %rsi), %edx
58 movl (%rdi, %rsi), %edx
68 movq (%rdi, %rsi), %rdx
76 movdqu (%rdi, %rsi), %xmm0
83 leaq (%rdi, %rcx), %rcx
85 movzbl (%rsi, %rcx), %edx
118 /* For blocks bigger than 32 bytes
119 1. Advance one of the addr pointer to be 16B aligned.
120 2. Treat the case of both addr pointers aligned to 16B
121 separately to avoid movdqu.
122 3. Handle any blocks of greater than 64 consecutive bytes with
123 unrolling to reduce branches.
124 4. At least one addr pointer is 16B aligned, use memory version
135 /* Both pointers may be misaligned. */
137 movdqu (%rdi, %rsi), %xmm0
143 leaq 16(%rdi, %r8), %rdi
145 /* Handle two 16B aligned pointers separately. */
150 movdqu (%rdi, %rsi), %xmm0
151 pcmpeqb (%rdi), %xmm0
161 /* Pre-unroll to be ready for unrolled 64B loop. */
164 movdqu (%rdi,%rsi), %xmm0
165 pcmpeqb (%rdi), %xmm0
171 movdqu (%rdi,%rsi), %xmm0
172 pcmpeqb (%rdi), %xmm0
185 movdqu (%rdi,%rsi), %xmm0
186 pcmpeqb (%rdi), %xmm0
192 movdqu (%rdi,%rsi), %xmm0
193 pcmpeqb (%rdi), %xmm0
199 movdqu (%rdi,%rsi), %xmm0
200 pcmpeqb (%rdi), %xmm0
206 movdqu (%rdi,%rsi), %xmm0
207 pcmpeqb (%rdi), %xmm0
223 movdqu (%rdi,%rsi), %xmm0
224 pcmpeqb (%rdi), %xmm0
230 movdqu (%rdi,%rsi), %xmm0
231 pcmpeqb (%rdi), %xmm0
248 movzbl (%rdi, %rcx), %eax
250 movzbl (%rsi,%rcx), %edx
262 movdqa (%rdi,%rsi), %xmm0
263 pcmpeqb (%rdi), %xmm0
277 movdqa (%rdi,%rsi), %xmm0
278 pcmpeqb (%rdi), %xmm0
284 movdqa (%rdi,%rsi), %xmm0
285 pcmpeqb (%rdi), %xmm0
296 movdqa (%rdi,%rsi), %xmm0
297 pcmpeqb (%rdi), %xmm0
303 movdqa (%rdi,%rsi), %xmm0
304 pcmpeqb (%rdi), %xmm0
310 movdqa (%rdi,%rsi), %xmm0
311 pcmpeqb (%rdi), %xmm0
317 movdqa (%rdi,%rsi), %xmm0
318 pcmpeqb (%rdi), %xmm0
332 movdqa (%rdi,%rsi), %xmm0
333 pcmpeqb (%rdi), %xmm0
339 movdqa (%rdi,%rsi), %xmm0
340 pcmpeqb (%rdi), %xmm0
353 /* Align to 16byte to improve instruction fetch. */
358 weak_alias (memcmp, bcmp)
359 libc_hidden_builtin_def (memcmp)