sparc64: Remove unwind information from signal return stubs [BZ#31244]
[glibc.git] / sysdeps / x86_64 / multiarch / strspn-sse4.c
blobc9a5684fe669d19d84fbd44fd3f054f690163f8c
1 /* strspn with SSE4.2 intrinsics
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <nmmintrin.h>
20 #include <string.h>
21 #include "varshift.h"
23 /* We use 0x12:
24 _SIDD_SBYTE_OPS
25 | _SIDD_CMP_EQUAL_ANY
26 | _SIDD_NEGATIVE_POLARITY
27 | _SIDD_LEAST_SIGNIFICANT
28 on pcmpistri to compare xmm/mem128
30 0 1 2 3 4 5 6 7 8 9 A B C D E F
31 X X X X X X X X X X X X X X X X
33 against xmm
35 0 1 2 3 4 5 6 7 8 9 A B C D E F
36 A A A A A A A A A A A A A A A A
38 to find out if the first 16byte data element has any non-A byte and
39 the offset of the first byte. There are 2 cases:
41 1. The first 16byte data element has the non-A byte, including
42 EOS, at the offset X.
43 2. The first 16byte data element is valid and doesn't have the non-A
44 byte.
46 Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
48 case ECX CFlag ZFlag SFlag
49 1 X 1 0/1 0
50 2 16 0 0 0
52 We exit from the loop for case 1. */
54 extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
56 #ifndef STRSPN
57 # define STRSPN __strspn_sse42
58 #endif
60 size_t
61 __attribute__ ((section (".text.sse4.2")))
62 STRSPN (const char *s, const char *a)
64 if (*a == 0)
65 return 0;
67 const char *aligned;
68 __m128i mask, maskz, zero;
69 unsigned int maskz_bits;
70 unsigned int offset = (int) ((size_t) a & 15);
71 zero = _mm_set1_epi8 (0);
72 if (offset != 0)
74 /* Load masks. */
75 aligned = (const char *) ((size_t) a & -16L);
76 __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
77 maskz = _mm_cmpeq_epi8 (mask0, zero);
79 /* Find where the NULL terminator is. */
80 maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
81 if (maskz_bits != 0)
83 mask = __m128i_shift_right (mask0, offset);
84 offset = (unsigned int) ((size_t) s & 15);
85 if (offset)
86 goto start_unaligned;
88 aligned = s;
89 goto start_loop;
93 /* A is aligned. */
94 mask = _mm_loadu_si128 ((__m128i *) a);
96 /* Find where the NULL terminator is. */
97 maskz = _mm_cmpeq_epi8 (mask, zero);
98 maskz_bits = _mm_movemask_epi8 (maskz);
99 if (maskz_bits == 0)
101 /* There is no NULL terminator. Don't use SSE4.2 if the length
102 of A > 16. */
103 if (a[16] != 0)
104 return __strspn_generic (s, a);
106 aligned = s;
107 offset = (unsigned int) ((size_t) s & 15);
109 if (offset != 0)
111 start_unaligned:
112 /* Check partial string. */
113 aligned = (const char *) ((size_t) s & -16L);
114 __m128i value = _mm_load_si128 ((__m128i *) aligned);
115 __m128i adj_value = __m128i_shift_right (value, offset);
117 unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
118 /* No need to check CFlag since it is always 1. */
119 if (length < 16 - offset)
120 return length;
121 /* Find where the NULL terminator is. */
122 maskz = _mm_cmpeq_epi8 (value, zero);
123 maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
124 if (maskz_bits != 0)
125 return length;
126 aligned += 16;
129 start_loop:
130 while (1)
132 __m128i value = _mm_load_si128 ((__m128i *) aligned);
133 unsigned int index = _mm_cmpistri (mask, value, 0x12);
134 unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
135 if (cflag)
136 return (size_t) (aligned + index - s);
137 aligned += 16;