1 /* strcspn with SSE4.2 intrinsics
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
20 #if IS_IN (libc) || MINIMUM_X86_ISA_LEVEL >= 2
22 # include <nmmintrin.h>
24 # include "varshift.h"
29 | _SIDD_POSITIVE_POLARITY
30 | _SIDD_LEAST_SIGNIFICANT
31 on pcmpistri to compare xmm/mem128
33 0 1 2 3 4 5 6 7 8 9 A B C D E F
34 X X X X X X X X X X X X X X X X
38 0 1 2 3 4 5 6 7 8 9 A B C D E F
39 A A A A A A A A A A A A A A A A
41 to find out if the first 16byte data element has any byte A and
42 the offset of the first byte. There are 3 cases:
44 1. The first 16byte data element has the byte A at the offset X.
45 2. The first 16byte data element has EOS and doesn't have the byte A.
46 3. The first 16byte data element is valid and doesn't have the byte A.
48 Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
54 We exit from the loop for cases 1 and 2 with jbe which branches
55 when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
59 # define STRCSPN __strcspn_sse42
61 # ifndef STRCSPN_GENERIC
62 # define STRCSPN_GENERIC __strcspn_generic
65 # ifdef USE_AS_STRPBRK
66 # define RETURN(val1, val2) return val1
68 # define RETURN(val1, val2) return val2
72 # ifdef USE_AS_STRPBRK
77 STRCSPN_GENERIC (const char *, const char *) attribute_hidden
;
80 # ifdef USE_AS_STRPBRK
85 __attribute__ ((section (".text.sse4.2")))
86 STRCSPN (const char *s
, const char *a
)
89 RETURN (NULL
, strlen (s
));
92 __m128i mask
, maskz
, zero
;
93 unsigned int maskz_bits
;
94 unsigned int offset
= (unsigned int) ((size_t) a
& 15);
95 zero
= _mm_set1_epi8 (0);
99 aligned
= (const char *) ((size_t) a
& -16L);
100 __m128i mask0
= _mm_load_si128 ((__m128i
*) aligned
);
101 maskz
= _mm_cmpeq_epi8 (mask0
, zero
);
103 /* Find where the NULL terminator is. */
104 maskz_bits
= _mm_movemask_epi8 (maskz
) >> offset
;
107 mask
= __m128i_shift_right (mask0
, offset
);
108 offset
= (unsigned int) ((size_t) s
& 15);
110 goto start_unaligned
;
118 mask
= _mm_loadu_si128 ((__m128i
*) a
);
119 /* Find where the NULL terminator is. */
120 maskz
= _mm_cmpeq_epi8 (mask
, zero
);
121 maskz_bits
= _mm_movemask_epi8 (maskz
);
124 /* There is no NULL terminator. Don't use SSE4.2 if the length
127 return STRCSPN_GENERIC (s
, a
);
131 offset
= (unsigned int) ((size_t) s
& 15);
135 /* Check partial string. */
136 aligned
= (const char *) ((size_t) s
& -16L);
137 __m128i value
= _mm_load_si128 ((__m128i
*) aligned
);
139 value
= __m128i_shift_right (value
, offset
);
141 unsigned int length
= _mm_cmpistri (mask
, value
, 0x2);
142 /* No need to check ZFlag since ZFlag is always 1. */
143 unsigned int cflag
= _mm_cmpistrc (mask
, value
, 0x2);
145 RETURN ((char *) (s
+ length
), length
);
146 /* Find where the NULL terminator is. */
147 unsigned int index
= _mm_cmpistri (value
, value
, 0x3a);
148 if (index
< 16 - offset
)
149 RETURN (NULL
, index
);
156 __m128i value
= _mm_load_si128 ((__m128i
*) aligned
);
157 unsigned int index
= _mm_cmpistri (mask
, value
, 0x2);
158 unsigned int cflag
= _mm_cmpistrc (mask
, value
, 0x2);
159 unsigned int zflag
= _mm_cmpistrz (mask
, value
, 0x2);
161 RETURN ((char *) (aligned
+ index
), (size_t) (aligned
+ index
- s
));
164 /* Find where the NULL terminator is. */
165 (size_t) (aligned
+ _mm_cmpistri (value
, value
, 0x3a) - s
));