1 /* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #define STRNLEN __strnlen_evex512
23 #include "x86-evex512-vecs.h"
24 #include "reg-macros.h"
26 #include <isa-level.h>
28 #if ISA_SHOULD_BUILD (4)
33 # define VPCMPEQ vpcmpeqd
34 # define VPTESTN vptestnmd
35 # define VPMINU vpminud
38 # define VPCMPEQ vpcmpeqb
39 # define VPTESTN vptestnmb
40 # define VPMINU vpminub
44 # define PAGE_SIZE 4096
45 # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
47 .section SECTION(.text),"ax",@progbits
48 /* Aligning entry point to 64 byte, provides better performance for
49 one vector length string. */
50 ENTRY_P2ALIGN (STRNLEN, 6)
51 /* Check zero length. */
55 /* Clear the upper 32 bits. */
60 vpxorq %VMM_128(0), %VMM_128(0), %VMM_128(0)
62 cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
65 /* Compare [w]char for null, mask bit will be set for match. */
66 VPCMPEQ (%rdi), %VMM(0), %k0
68 /* Store max length in rax. */
70 /* If rcx is 0, rax will have max length. We can not use VRCX
71 and VRAX here for evex256 because, upper 32 bits may be
72 undefined for ecx and eax. */
74 cmp $CHAR_PER_VEC, %rax
80 /* At this point vector max length reached. */
88 /* Align rax to VEC_SIZE. */
95 /* At this point rdx contains [w]chars already compared. */
96 leaq -CHAR_PER_VEC(%rsi, %rdx), %rdx
97 /* At this point rdx contains number of w[char] needs to go.
98 Now onwards rdx will keep decrementing with each compare. */
100 /* Loop unroll 4 times for 4 vector loop. */
101 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
102 subq $-VEC_SIZE, %rax
107 subq $CHAR_PER_VEC, %rdx
110 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
115 subq $CHAR_PER_VEC, %rdx
118 VPCMPEQ (VEC_SIZE * 2)(%rax), %VMM(0), %k0
123 subq $CHAR_PER_VEC, %rdx
126 VPCMPEQ (VEC_SIZE * 3)(%rax), %VMM(0), %k0
131 subq $CHAR_PER_VEC, %rdx
133 /* Save pointer before 4 x VEC_SIZE alignment. */
136 /* Align address to VEC_SIZE * 4 for loop. */
137 andq $-(VEC_SIZE * 4), %rax
140 # ifdef USE_AS_WCSLEN
143 /* rcx contains number of [w]char will be recompared due to
144 alignment fixes. rdx must be incremented by rcx to offset
145 alignment adjustment. */
147 /* Need jump as we don't want to add/subtract rdx for first
148 iteration of 4 x VEC_SIZE aligned loop. */
152 /* VPMINU and VPCMP combination provide better performance as
153 compared to alternative combinations. */
154 VMOVA (VEC_SIZE * 4)(%rax), %VMM(1)
155 VPMINU (VEC_SIZE * 5)(%rax), %VMM(1), %VMM(2)
156 VMOVA (VEC_SIZE * 6)(%rax), %VMM(3)
157 VPMINU (VEC_SIZE * 7)(%rax), %VMM(3), %VMM(4)
159 VPTESTN %VMM(2), %VMM(2), %k0
160 VPTESTN %VMM(4), %VMM(4), %k1
162 subq $-(VEC_SIZE * 4), %rax
166 subq $(CHAR_PER_VEC * 4), %rdx
173 VPTESTN %VMM(1), %VMM(1), %k2
179 /* At this point, if k0 is non zero, null char must be in the
184 VPTESTN %VMM(3), %VMM(3), %k3
188 /* At this point null [w]char must be in the fourth vector so no
192 /* Fourth, third, second vector terminating are pretty much
193 same, implemented this way to avoid branching and reuse code
194 from pre loop exit condition. */
198 # ifdef USE_AS_WCSLEN
199 subq $-(VEC_SIZE * 3), %rax
203 leaq (VEC_SIZE * 3)(%rcx, %rax), %rax
213 # ifdef USE_AS_WCSLEN
214 subq $-(VEC_SIZE * 2), %rax
218 leaq (VEC_SIZE * 2)(%rcx, %rax), %rax
225 subq $-VEC_SIZE, %rax
229 # ifdef USE_AS_WCSLEN
240 andl $(VEC_SIZE - 1), %ecx
241 # ifdef USE_AS_WCSLEN
244 /* ecx contains number of w[char] to be skipped as a result
245 of address alignment. */
246 andq $-VEC_SIZE, %rax
247 VPCMPEQ (%rax), %VMM(0), %k0
249 /* Ignore number of character for alignment adjustment. */
251 jnz L(page_cross_end)
252 movl $CHAR_PER_VEC, %eax