1 /* strstr with unaligned loads
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include "../strchr-isa-default-impl.h"
22 ENTRY(__strstr_sse2_unaligned)
33 punpcklbw %xmm1, %xmm1
35 punpcklbw %xmm2, %xmm2
36 punpcklwd %xmm1, %xmm1
37 punpcklwd %xmm2, %xmm2
38 pshufd $0, %xmm1, %xmm1
39 pshufd $0, %xmm2, %xmm2
47 movdqu 16(%rdi), %xmm0
51 movdqu 17(%rdi), %xmm3
74 jmp L(pair_loop_start)
84 cmpb 2(%rax,%rdx), %cl
87 movzbl 3(%rsi,%rdx), %ecx
100 jne L(next_pair_index)
104 movdqu 32(%rdi), %xmm3
106 movdqu 33(%rdi), %xmm4
110 movdqu 48(%rdi), %xmm0
114 movdqu 49(%rdi), %xmm3
138 jmp L(pair_loop2_start)
143 cmpb 2(%rax,%rdx), %cl
146 movzbl 3(%rsi,%rdx), %ecx
162 jne L(next_pair2_index)
172 movdqa 64(%rdi), %xmm3
173 movdqu 63(%rdi), %xmm6
177 movdqa 80(%rdi), %xmm10
180 movdqu 79(%rdi), %xmm3
183 movdqa 96(%rdi), %xmm9
187 movdqa 112(%rdi), %xmm8
190 movdqu 31(%rdi), %xmm4
196 movdqu 47(%rdi), %xmm5
206 pminub 32(%rdi),%xmm4
207 pminub 48(%rdi),%xmm5
211 movdqa 16(%rdi), %xmm8
213 movdqu 15(%rdi), %xmm0
241 jmp L(pair_loop_start3)
246 cmpb 1(%rcx,%rax), %dl
249 movzbl 3(%rsi,%rax), %edx
265 jne L(next_pair_index3)
280 movdqu -1(%rax), %xmm4
282 movdqa 16(%rax), %xmm5
288 movdqu 15(%rax), %xmm4
292 movdqa 32(%rax), %xmm5
298 movdqu 31(%rax), %xmm4
302 movdqa 48(%rax), %xmm5
308 movdqu 47(%rax), %xmm4
314 pmovmskb %xmm6, %r10d
342 jmp L(pair_loop_start4)
347 cmpb 1(%rax,%rdx), %cl
350 movzbl 3(%rsi,%rdx), %ecx
361 jne L(next_pair_index4)
375 END(__strstr_sse2_unaligned)