1 /* SSE2 version of strlen/wcslen.
2 Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
23 # define PCMPEQ pcmpeqd
24 # define SHIFT_RETURN shrq $2, %rax
27 # define PCMPEQ pcmpeqb
31 /* Long lived register in strlen(s), strnlen(s, n) are:
35 %r10 (s+n) & (~(64-1))
43 /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
45 PCMPEQ (%rax), %xmm0; \
46 PCMPEQ 16(%rax), %xmm1; \
47 PCMPEQ 32(%rax), %xmm2; \
48 PCMPEQ 48(%rax), %xmm3; \
49 pmovmskb %xmm0, %esi; \
50 pmovmskb %xmm1, %edx; \
51 pmovmskb %xmm2, %r8d; \
52 pmovmskb %xmm3, %ecx; \
61 /* Do not read anything when n==0. */
71 /* Initialize long lived registers. */
86 /* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */
88 /* We cannot unify this branching as it would be ~6 cycles slower. */
92 /* Test if end is among first 64 bytes. */
93 # define STRNLEN_PROLOG \
100 # define STRNLEN_PROLOG andq $-64, %rax;
103 /* Ignore bits in mask that come before start of string. */
104 #define PROLOG(lab) \
119 /* Test first 16 bytes unaligned. */
125 bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
130 /* Same as FIND_ZERO except we do not check first 16 bytes. */
132 PCMPEQ 16(%rax), %xmm1
133 PCMPEQ 32(%rax), %xmm2
134 PCMPEQ 48(%rax), %xmm3
145 /* When no zero byte is found xmm1-3 are zero so we do not have to
156 /* We must do this check to correctly handle strnlen (s, -1). */
180 PMINU 16(%rax), %xmm0
181 PMINU 32(%rax), %xmm0
182 PMINU 48(%rax), %xmm0
192 je L(first) /* Do not read when end is at page boundary. */
217 /* Main loop. Unrolled twice to improve L2 cache performance on core2. */
221 movdqa 64(%rax), %xmm0
222 PMINU 80(%rax), %xmm0
223 PMINU 96(%rax), %xmm0
224 PMINU 112(%rax), %xmm0
233 PMINU 16(%rax), %xmm0
234 PMINU 32(%rax), %xmm0
235 PMINU 48(%rax), %xmm0
258 libc_hidden_builtin_def (strlen)