1 /* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
3 Copyright (C) 1994-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
21 #include "asm-syntax.h"
23 #define PARMS 4+8 /* space for 2 saved regs */
32 /* Save callee-safe registers used in this function. */
34 cfi_adjust_cfa_offset (4)
36 cfi_adjust_cfa_offset (4)
37 cfi_rel_offset (edi, 0)
39 /* Load parameters into registers. */
40 movl STR(%esp), %eax /* str: pointer to memory block. */
41 movl CHR(%esp), %edx /* c: byte we are looking for. */
42 movl LEN(%esp), %esi /* len: length of memory block. */
43 cfi_rel_offset (esi, 4)
45 /* If my must not test more than three characters test
46 them one by one. This is especially true for 0. */
50 /* At the moment %edx contains CHR. What we need for the
51 algorithm is CHR in all bytes of the dword. Avoid
52 operations on 16 bit words because these require an
53 prefix byte (and one more cycle). */
54 movb %dl, %dh /* Now it is 0|0|c|c */
56 shll $16, %edx /* Now c|c|0|0 */
57 movw %cx, %dx /* And finally c|c|c|c */
59 /* Better performance can be achieved if the word (32
60 bit) memory access is aligned on a four-byte-boundary.
61 So process first bytes one by one until boundary is
62 reached. Don't use a loop for better performance. */
64 testb $3, %al /* correctly aligned ? */
65 je L(2) /* yes => begin loop */
66 cmpb %dl, (%eax) /* compare byte */
67 je L(9) /* target found => return */
68 incl %eax /* increment source pointer */
69 decl %esi /* decrement length counter */
70 je L(4) /* len==0 => return NULL */
72 testb $3, %al /* correctly aligned ? */
73 je L(2) /* yes => begin loop */
74 cmpb %dl, (%eax) /* compare byte */
75 je L(9) /* target found => return */
76 incl %eax /* increment source pointer */
77 decl %esi /* decrement length counter */
78 je L(4) /* len==0 => return NULL */
80 testb $3, %al /* correctly aligned ? */
81 je L(2) /* yes => begin loop */
82 cmpb %dl, (%eax) /* compare byte */
83 je L(9) /* target found => return */
84 incl %eax /* increment source pointer */
85 decl %esi /* decrement length counter */
86 /* no test for len==0 here, because this is done in the
90 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
91 change any of the hole bits of LONGWORD.
93 1) Is this safe? Will it catch all the zero bytes?
94 Suppose there is a byte with all zeros. Any carry bits
95 propagating from its left will fall into the hole at its
96 least significant bit and stop. Since there will be no
97 carry from its most significant bit, the LSB of the
98 byte to the left will be unchanged, and the zero will be
101 2) Is this worthwhile? Will it ignore everything except
102 zero bytes? Suppose every byte of LONGWORD has a bit set
103 somewhere. There will be a carry into bit 8. If bit 8
104 is set, this will carry into bit 16. If bit 8 is clear,
105 one of bits 9-15 must be set, so there will be a carry
106 into bit 16. Similarly, there will be a carry into bit
107 24. If one of bits 24-31 is set, there will be a carry
108 into bit 32 (=carry flag), so all of the hole bits will
111 3) But wait! Aren't we looking for CHR, not zero?
112 Good point. So what we do is XOR LONGWORD with a longword,
113 each of whose bytes is CHR. This turns each byte that is CHR
117 /* Each round the main loop processes 16 bytes. */
121 L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
122 movl $0xfefefeff, %edi /* magic value */
123 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
125 addl %ecx, %edi /* add the magic value to the word. We get
126 carry bits reported for each byte which
129 /* According to the algorithm we had to reverse the effect of the
130 XOR first and then test the overflow bits. But because the
131 following XOR would destroy the carry flag and it would (in a
132 representation with more than 32 bits) not alter then last
133 overflow, we can now test this condition. If no carry is signaled
134 no overflow must have occurred in the last byte => it was 0. */
137 /* We are only interested in carry bits that change due to the
138 previous add, so remove original bits */
139 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
141 /* Now test for the other three overflow bits. */
142 orl $0xfefefeff, %edi /* set all non-carry bits */
143 incl %edi /* add 1: if one carry bit was *not* set
144 the addition will not result in 0. */
146 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
147 jnz L(8) /* found it => return pointer */
149 /* This process is unfolded four times for better performance.
150 we don't increment the source pointer each time. Instead we
151 use offsets and increment by 16 in each run of the loop. But
152 before probing for the matching byte we need some extra code
153 (following LL(13) below). Even the len can be compared with
154 constants instead of decrementing each time. */
156 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
157 movl $0xfefefeff, %edi /* magic value */
158 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
160 addl %ecx, %edi /* add the magic value to the word. We get
161 carry bits reported for each byte which
163 jnc L(7) /* highest byte is CHR => return pointer */
164 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
165 orl $0xfefefeff, %edi /* set all non-carry bits */
166 incl %edi /* add 1: if one carry bit was *not* set
167 the addition will not result in 0. */
168 jnz L(7) /* found it => return pointer */
170 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
171 movl $0xfefefeff, %edi /* magic value */
172 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
174 addl %ecx, %edi /* add the magic value to the word. We get
175 carry bits reported for each byte which
177 jnc L(6) /* highest byte is CHR => return pointer */
178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
179 orl $0xfefefeff, %edi /* set all non-carry bits */
180 incl %edi /* add 1: if one carry bit was *not* set
181 the addition will not result in 0. */
182 jnz L(6) /* found it => return pointer */
184 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
185 movl $0xfefefeff, %edi /* magic value */
186 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
188 addl %ecx, %edi /* add the magic value to the word. We get
189 carry bits reported for each byte which
191 jnc L(5) /* highest byte is CHR => return pointer */
192 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
193 orl $0xfefefeff, %edi /* set all non-carry bits */
194 incl %edi /* add 1: if one carry bit was *not* set
195 the addition will not result in 0. */
196 jnz L(5) /* found it => return pointer */
198 /* Adjust both counters for a full round, i.e. 16 bytes. */
201 jae L(1) /* Still more than 16 bytes remaining */
203 /* Process remaining bytes separately. */
204 cmpl $4-16, %esi /* rest < 4 bytes? */
205 jb L(3) /* yes, than test byte by byte */
207 movl (%eax), %ecx /* get word (= 4 bytes) in question */
208 movl $0xfefefeff, %edi /* magic value */
209 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
211 addl %ecx, %edi /* add the magic value to the word. We get
212 carry bits reported for each byte which
214 jnc L(8) /* highest byte is CHR => return pointer */
215 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
216 orl $0xfefefeff, %edi /* set all non-carry bits */
217 incl %edi /* add 1: if one carry bit was *not* set
218 the addition will not result in 0. */
219 jne L(8) /* found it => return pointer */
220 addl $4, %eax /* adjust source pointer */
222 cmpl $8-16, %esi /* rest < 8 bytes? */
223 jb L(3) /* yes, than test byte by byte */
225 movl (%eax), %ecx /* get word (= 4 bytes) in question */
226 movl $0xfefefeff, %edi /* magic value */
227 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
229 addl %ecx, %edi /* add the magic value to the word. We get
230 carry bits reported for each byte which
232 jnc L(8) /* highest byte is CHR => return pointer */
233 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
234 orl $0xfefefeff, %edi /* set all non-carry bits */
235 incl %edi /* add 1: if one carry bit was *not* set
236 the addition will not result in 0. */
237 jne L(8) /* found it => return pointer */
238 addl $4, %eax /* adjust source pointer */
240 cmpl $12-16, %esi /* rest < 12 bytes? */
241 jb L(3) /* yes, than test byte by byte */
243 movl (%eax), %ecx /* get word (= 4 bytes) in question */
244 movl $0xfefefeff, %edi /* magic value */
245 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
247 addl %ecx, %edi /* add the magic value to the word. We get
248 carry bits reported for each byte which
250 jnc L(8) /* highest byte is CHR => return pointer */
251 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
252 orl $0xfefefeff, %edi /* set all non-carry bits */
253 incl %edi /* add 1: if one carry bit was *not* set
254 the addition will not result in 0. */
255 jne L(8) /* found it => return pointer */
256 addl $4, %eax /* adjust source pointer */
258 /* Check the remaining bytes one by one. */
259 L(3): andl $3, %esi /* mask out uninteresting bytes */
260 jz L(4) /* no remaining bytes => return NULL */
262 cmpb %dl, (%eax) /* compare byte with CHR */
263 je L(9) /* equal, than return pointer */
264 incl %eax /* increment source pointer */
265 decl %esi /* decrement length */
266 jz L(4) /* no remaining bytes => return NULL */
268 cmpb %dl, (%eax) /* compare byte with CHR */
269 je L(9) /* equal, than return pointer */
270 incl %eax /* increment source pointer */
271 decl %esi /* decrement length */
272 jz L(4) /* no remaining bytes => return NULL */
274 cmpb %dl, (%eax) /* compare byte with CHR */
275 je L(9) /* equal, than return pointer */
277 L(4): /* no byte found => return NULL */
281 /* add missing source pointer increments */
286 /* Test for the matching byte in the word. %ecx contains a NUL
287 char in the byte which originally was the byte we are looking
289 L(8): testb %cl, %cl /* test first byte in dword */
290 jz L(9) /* if zero => return pointer */
291 incl %eax /* increment source pointer */
293 testb %ch, %ch /* test second byte in dword */
294 jz L(9) /* if zero => return pointer */
295 incl %eax /* increment source pointer */
297 testl $0xff0000, %ecx /* test third byte in dword */
298 jz L(9) /* if zero => return pointer */
299 incl %eax /* increment source pointer */
301 /* No further test needed we we know it is one of the four bytes. */
302 L(9): popl %edi /* pop saved registers */
303 cfi_adjust_cfa_offset (-4)
306 cfi_adjust_cfa_offset (-4)
312 weak_alias (__memchr, memchr)
313 libc_hidden_builtin_def (memchr)