1 /* strstr with unaligned loads
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 ENTRY(__strstr_sse2_unaligned)
32 punpcklbw %xmm1, %xmm1
34 punpcklbw %xmm2, %xmm2
35 punpcklwd %xmm1, %xmm1
36 punpcklwd %xmm2, %xmm2
37 pshufd $0, %xmm1, %xmm1
38 pshufd $0, %xmm2, %xmm2
46 movdqu 16(%rdi), %xmm0
50 movdqu 17(%rdi), %xmm3
73 jmp L(pair_loop_start)
83 cmpb 2(%rax,%rdx), %cl
86 movzbl 3(%rsi,%rdx), %ecx
99 jne L(next_pair_index)
103 movdqu 32(%rdi), %xmm3
105 movdqu 33(%rdi), %xmm4
109 movdqu 48(%rdi), %xmm0
113 movdqu 49(%rdi), %xmm3
137 jmp L(pair_loop2_start)
142 cmpb 2(%rax,%rdx), %cl
145 movzbl 3(%rsi,%rdx), %ecx
161 jne L(next_pair2_index)
171 movdqa 64(%rdi), %xmm3
172 movdqu 63(%rdi), %xmm6
176 movdqa 80(%rdi), %xmm10
179 movdqu 79(%rdi), %xmm3
182 movdqa 96(%rdi), %xmm9
186 movdqa 112(%rdi), %xmm8
189 movdqu 31(%rdi), %xmm4
195 movdqu 47(%rdi), %xmm5
205 pminub 32(%rdi),%xmm4
206 pminub 48(%rdi),%xmm5
210 movdqa 16(%rdi), %xmm8
212 movdqu 15(%rdi), %xmm0
240 jmp L(pair_loop_start3)
245 cmpb 1(%rcx,%rax), %dl
248 movzbl 3(%rsi,%rax), %edx
264 jne L(next_pair_index3)
279 movdqu -1(%rax), %xmm4
281 movdqa 16(%rax), %xmm5
287 movdqu 15(%rax), %xmm4
291 movdqa 32(%rax), %xmm5
297 movdqu 31(%rax), %xmm4
301 movdqa 48(%rax), %xmm5
307 movdqu 47(%rax), %xmm4
313 pmovmskb %xmm6, %r10d
341 jmp L(pair_loop_start4)
346 cmpb 1(%rax,%rdx), %cl
349 movzbl 3(%rsi,%rdx), %ecx
360 jne L(next_pair_index4)
374 END(__strstr_sse2_unaligned)