1 /* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
3 Copyright (C) 2011-2022 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
29 punpcklbw %xmm1, %xmm1
30 punpcklbw %xmm1, %xmm1
33 pshufd $0, %xmm1, %xmm1
38 /* Check if there is a match. */
58 movdqa 48(%rdi), %xmm0
64 movdqa 32(%rdi), %xmm2
70 movdqa 16(%rdi), %xmm3
86 movdqa 48(%rdi), %xmm0
92 movdqa 32(%rdi), %xmm2
98 movdqa 16(%rdi), %xmm3
126 movdqa 16(%rdi), %xmm2
127 movdqa 32(%rdi), %xmm3
128 movdqa 48(%rdi), %xmm4
151 movdqa 16(%rdi), %xmm2
154 pcmpeqb (%rdi), %xmm1
172 movdqa 48(%rdi), %xmm0
178 movdqa 32(%rdi), %xmm2
184 movdqa 16(%rdi), %xmm3
192 pcmpeqb (%rdi), %xmm1
201 movdqa 48(%rdi), %xmm0
209 pcmpeqb 32(%rdi), %xmm1
225 lea 16(%rax, %rdi), %rax
231 lea 32(%rax, %rdi), %rax
237 lea 48(%rax, %rdi), %rax
255 lea 16(%rdi, %rax), %rax
264 lea 32(%rdi, %rax), %rax
273 lea 48(%rdi, %rax), %rax
282 L(length_less16_offset0):
287 pcmpeqb (%rdi), %xmm1
305 punpcklbw %xmm1, %xmm1
306 punpcklbw %xmm1, %xmm1
310 pshufd $0, %xmm1, %xmm1
314 jz L(length_less16_offset0)
322 ja L(length_less16_part2)
324 pcmpeqb (%rdi), %xmm1
344 L(length_less16_part2):
345 movdqa 16(%rdi), %xmm2
357 jnz L(length_less16_part2_return)
359 pcmpeqb (%rdi), %xmm1
373 L(length_less16_part2_return):
375 lea 16(%rax, %rdi), %rax
379 weak_alias (__memrchr, memrchr)