1 /* Copyright (C) 2011-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
21 # define MEMCHR wmemchr
22 # define PCMPEQ pcmpeqd
23 # define CHAR_PER_VEC 4
25 # define MEMCHR memchr
26 # define PCMPEQ pcmpeqb
27 # define CHAR_PER_VEC 16
30 /* fast SSE2 version with using pmaxub and 64 byte loop */
38 /* Clear the upper 32 bits. */
45 punpcklbw %xmm1, %xmm1
48 punpcklbw %xmm1, %xmm1
52 pshufd $0, %xmm1, %xmm1
63 sub $CHAR_PER_VEC, %rdx
72 sub $(CHAR_PER_VEC * 4), %rdx
83 /* Check if there is a match. */
85 /* Remove the leading bytes. */
88 je L(unaligned_no_match)
89 /* Check which byte is a match. */
104 L(unaligned_no_match):
105 /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
106 "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
107 possible addition overflow. */
110 #ifdef USE_AS_WMEMCHR
116 sub $(CHAR_PER_VEC * 4), %rdx
127 movdqa 16(%rdi), %xmm2
133 movdqa 32(%rdi), %xmm3
139 movdqa 48(%rdi), %xmm4
149 sub $(CHAR_PER_VEC * 4), %rdx
158 movdqa 16(%rdi), %xmm2
164 movdqa 32(%rdi), %xmm3
170 movdqa 48(%rdi), %xmm3
181 #ifdef USE_AS_WMEMCHR
188 sub $(CHAR_PER_VEC * 4), %rdx
191 movdqa 16(%rdi), %xmm2
192 movdqa 32(%rdi), %xmm3
193 movdqa 48(%rdi), %xmm4
220 movdqa 32(%rdi), %xmm3
223 PCMPEQ 48(%rdi), %xmm1
230 lea 48(%rdi, %rax), %rax
235 add $(CHAR_PER_VEC * 2), %edx
244 movdqa 16(%rdi), %xmm2
250 movdqa 32(%rdi), %xmm3
255 sub $CHAR_PER_VEC, %edx
258 PCMPEQ 48(%rdi), %xmm1
267 add $(CHAR_PER_VEC * 2), %edx
273 sub $CHAR_PER_VEC, %edx
276 PCMPEQ 16(%rdi), %xmm1
286 lea -16(%rax, %rdi), %rax
298 lea 16(%rax, %rdi), %rax
304 lea 32(%rax, %rdi), %rax
310 #ifdef USE_AS_WMEMCHR
324 #ifdef USE_AS_WMEMCHR
332 lea 16(%rdi, %rax), %rax
338 #ifdef USE_AS_WMEMCHR
346 lea 32(%rdi, %rax), %rax
352 #ifdef USE_AS_WMEMCHR
360 lea 48(%rdi, %rax), %rax
369 #ifndef USE_AS_WMEMCHR
370 strong_alias (memchr, __memchr)
371 libc_hidden_builtin_def(memchr)