1 /* strcmp with unaligned loads
2 Copyright (C) 2013-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
21 /* Continue building as ISA level 2. We use this as ISA V2 default
22 because strcmp-sse42 uses pcmpstri (slow on some SSE4.2
23 processors) and this implementation is potentially faster than
24 strcmp-sse42 (aside from the slower page cross case). */
25 #if ISA_SHOULD_BUILD (2)
27 # define STRCMP_ISA _sse2_unaligned
28 # include "strcmp-naming.h"
51 movzbl (%rdi, %rdx), %eax
52 movzbl (%rsi, %rdx), %edx
58 movdqu 16(%rdi), %xmm6
59 movdqu 16(%rsi), %xmm3
60 movdqu 32(%rdi), %xmm5
62 movdqu 32(%rsi), %xmm2
65 movdqu 48(%rdi), %xmm4
68 movdqu 48(%rsi), %xmm0
89 leaq (%rdi, %rdx), %rax
105 je L(loop_cross_page)
108 movdqu 16(%rdx), %xmm1
110 movdqa 16(%rax), %xmm3
112 movdqu 32(%rdx), %xmm5
115 movdqu 48(%rdx), %xmm6
117 movdqa 32(%rax), %xmm2
119 movdqa 48(%rax), %xmm3
149 movzbl (%rax, %rcx), %eax
150 movzbl (%rdx, %rcx), %edx
161 movdqa (%rdx, %r10), %xmm0
162 movdqa 16(%rdx, %r10), %xmm1
163 movdqu (%rax, %r10), %xmm2
164 movdqu 16(%rax, %r10), %xmm3
166 movdqa 32(%rdx, %r10), %xmm5
169 movdqa 48(%rdx, %r10), %xmm6
171 movdqu 32(%rax, %r10), %xmm2
172 movdqu 48(%rax, %r10), %xmm3
199 movzbl (%rax, %rcx), %eax
200 movzbl (%rdx, %rcx), %edx
210 je L(main_loop_header)
212 movzbl (%rdi, %rdx), %eax
213 movzbl (%rsi, %rdx), %ecx
215 jne L(cross_page_loop)