1 /* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* UNUSED. Exists purely as reference implementation. */
21 #include <isa-level.h>
23 #if ISA_SHOULD_BUILD (4)
28 # define VPCMPEQ vpcmpeqd
29 # define VPTESTN vptestnmd
30 # define VPMINU vpminud
33 # define VPCMPEQ vpcmpeqb
34 # define VPTESTN vptestnmb
35 # define VPMINU vpminub
39 # define PAGE_SIZE 4096
40 # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
42 .section SECTION(.text),"ax",@progbits
43 /* Aligning entry point to 64 byte, provides better performance for
44 one vector length string. */
45 ENTRY_P2ALIGN (STRLEN, 6)
46 # ifdef USE_AS_STRNLEN
47 /* Check zero length. */
51 /* Clear the upper 32 bits. */
57 vpxorq %VMM_128(0), %VMM_128(0), %VMM_128(0)
59 cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
62 /* Compare [w]char for null, mask bit will be set for match. */
63 VPCMPEQ (%rdi), %VMM(0), %k0
64 # ifdef USE_AS_STRNLEN
66 /* Store max length in rax. */
68 /* If rcx is 0, rax will have max length. We can not use VRCX
69 and VRAX here for evex256 because, upper 32 bits may be
70 undefined for ecx and eax. */
72 cmp $CHAR_PER_VEC, %rax
84 /* At this point vector max length reached. */
85 # ifdef USE_AS_STRNLEN
94 /* Align rax to VEC_SIZE. */
96 # ifdef USE_AS_STRNLEN
102 /* At this point rdx contains [w]chars already compared. */
103 leaq -CHAR_PER_VEC(%rsi, %rdx), %rdx
104 /* At this point rdx contains number of w[char] needs to go.
105 Now onwards rdx will keep decrementing with each compare. */
108 /* Loop unroll 4 times for 4 vector loop. */
109 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
110 subq $-VEC_SIZE, %rax
115 # ifdef USE_AS_STRNLEN
116 subq $CHAR_PER_VEC, %rdx
120 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
125 # ifdef USE_AS_STRNLEN
126 subq $CHAR_PER_VEC, %rdx
130 VPCMPEQ (VEC_SIZE * 2)(%rax), %VMM(0), %k0
135 # ifdef USE_AS_STRNLEN
136 subq $CHAR_PER_VEC, %rdx
140 VPCMPEQ (VEC_SIZE * 3)(%rax), %VMM(0), %k0
145 # ifdef USE_AS_STRNLEN
146 subq $CHAR_PER_VEC, %rdx
148 /* Save pointer before 4 x VEC_SIZE alignment. */
152 /* Align address to VEC_SIZE * 4 for loop. */
153 andq $-(VEC_SIZE * 4), %rax
155 # ifdef USE_AS_STRNLEN
157 # ifdef USE_AS_WCSLEN
160 /* rcx contains number of [w]char will be recompared due to
161 alignment fixes. rdx must be incremented by rcx to offset
162 alignment adjustment. */
164 /* Need jump as we don't want to add/subtract rdx for first
165 iteration of 4 x VEC_SIZE aligned loop. */
170 /* VPMINU and VPCMP combination provide better performance as
171 compared to alternative combinations. */
172 VMOVA (VEC_SIZE * 4)(%rax), %VMM(1)
173 VPMINU (VEC_SIZE * 5)(%rax), %VMM(1), %VMM(2)
174 VMOVA (VEC_SIZE * 6)(%rax), %VMM(3)
175 VPMINU (VEC_SIZE * 7)(%rax), %VMM(3), %VMM(4)
177 VPTESTN %VMM(2), %VMM(2), %k0
178 VPTESTN %VMM(4), %VMM(4), %k1
180 subq $-(VEC_SIZE * 4), %rax
183 # ifndef USE_AS_STRNLEN
187 subq $(CHAR_PER_VEC * 4), %rdx
195 VPTESTN %VMM(1), %VMM(1), %k2
201 /* At this point, if k0 is non zero, null char must be in the
206 VPTESTN %VMM(3), %VMM(3), %k3
210 /* At this point null [w]char must be in the fourth vector so no
214 /* Fourth, third, second vector terminating are pretty much
215 same, implemented this way to avoid branching and reuse code
216 from pre loop exit condition. */
220 # ifdef USE_AS_WCSLEN
221 subq $-(VEC_SIZE * 3), %rax
225 leaq (VEC_SIZE * 3)(%rcx, %rax), %rax
227 # ifdef USE_AS_STRNLEN
236 # ifdef USE_AS_WCSLEN
237 subq $-(VEC_SIZE * 2), %rax
241 leaq (VEC_SIZE * 2)(%rcx, %rax), %rax
243 # ifdef USE_AS_STRNLEN
250 subq $-VEC_SIZE, %rax
254 # ifdef USE_AS_WCSLEN
258 # ifdef USE_AS_STRNLEN
267 andl $(VEC_SIZE - 1), %ecx
268 # ifdef USE_AS_WCSLEN
271 /* ecx contains number of w[char] to be skipped as a result
272 of address alignment. */
273 andq $-VEC_SIZE, %rax
274 VPCMPEQ (%rax), %VMM(0), %k0
276 /* Ignore number of character for alignment adjustment. */
278 # ifdef USE_AS_STRNLEN
279 jnz L(page_cross_end)
280 movl $CHAR_PER_VEC, %eax
290 # ifdef USE_AS_STRNLEN