1 /* strlen used for beginning of str{n}cat using EVEX 256/512.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 /* NOTE: This file is meant to be included by strcat-evex or
21 strncat-evex and does not standalone. Before including %rdi
22 must be saved in %rax. */
25 /* Simple strlen implementation that ends at
26 L(strcat_strlen_done). */
27 vpxorq %VZERO_128, %VZERO_128, %VZERO_128
29 andq $(VEC_SIZE * -1), %r8
30 VPCMPEQ (%r8), %VZERO, %k0
36 shrx %VRDI, %VRCX, %VRCX
41 jnz L(bsf_and_done_v0)
44 VPCMPEQ VEC_SIZE(%r8), %VZERO, %k0
46 leaq (VEC_SIZE)(%r8), %rdi
48 jnz L(bsf_and_done_v0)
50 VPCMPEQ (VEC_SIZE * 2)(%r8), %VZERO, %k0
53 jnz L(bsf_and_done_v1)
55 VPCMPEQ (VEC_SIZE * 3)(%r8), %VZERO, %k0
58 jnz L(bsf_and_done_v2)
60 VPCMPEQ (VEC_SIZE * 4)(%r8), %VZERO, %k0
63 jnz L(bsf_and_done_v3)
65 andq $-(VEC_SIZE * 4), %rdi
68 VMOVA (VEC_SIZE * 4)(%rdi), %VMM(0)
69 VPMIN (VEC_SIZE * 5)(%rdi), %VMM(0), %VMM(1)
70 VMOVA (VEC_SIZE * 6)(%rdi), %VMM(2)
71 VPMIN (VEC_SIZE * 7)(%rdi), %VMM(2), %VMM(3)
72 VPTESTN %VMM(1), %VMM(1), %k1
73 VPTESTN %VMM(3), %VMM(3), %k3
74 subq $(VEC_SIZE * -4), %rdi
78 VPTESTN %VMM(0), %VMM(0), %k0
81 jnz L(bsf_and_done_v0)
85 jnz L(bsf_and_done_v1)
87 VPTESTN %VMM(2), %VMM(2), %k0
90 jnz L(bsf_and_done_v2)
97 leaq (VEC_SIZE * 2)(%rdi, %rcx, CHAR_SIZE), %rdi
98 jmp L(strcat_strlen_done)
106 leaq (%rdi, %rcx, CHAR_SIZE), %rdi
110 L(strcat_strlen_done):