1 /* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR less
4 Copyright (C) 1994-1998, 2000, 2003 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
7 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
8 This version is developed using the same algorithm as the fast C
9 version which carries the following introduction:
10 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
11 with help from Dan Sahlin (dan@sics.se) and
12 commentary by Jim Blandy (jimb@ai.mit.edu);
13 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
14 and implemented by Roland McGrath (roland@ai.mit.edu).
16 The GNU C Library is free software; you can redistribute it and/or
17 modify it under the terms of the GNU Lesser General Public
18 License as published by the Free Software Foundation; either
19 version 2.1 of the License, or (at your option) any later version.
21 The GNU C Library is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Lesser General Public License for more details.
26 You should have received a copy of the GNU Lesser General Public
27 License along with the GNU C Library; if not, write to the Free
28 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
32 #include "asm-syntax.h"
36 #define PARMS LINKAGE+8 /* space for 2 saved regs */
38 #define STR RTN+RTN_SIZE
39 #define CHR STR+PTR_SIZE
43 ENTRY (BP_SYM (__memchr))
46 /* Save callee-safe registers used in this function. */
50 /* Load parameters into registers. */
51 movl STR(%esp), %eax /* str: pointer to memory block. */
52 movl CHR(%esp), %edx /* c: byte we are looking for. */
53 movl LEN(%esp), %esi /* len: length of memory block. */
54 CHECK_BOUNDS_LOW (%eax, STR(%esp))
56 /* If my must not test more than three characters test
57 them one by one. This is especially true for 0. */
61 /* At the moment %edx contains CHR. What we need for the
62 algorithm is CHR in all bytes of the dword. Avoid
63 operations on 16 bit words because these require an
64 prefix byte (and one more cycle). */
65 movb %dl, %dh /* Now it is 0|0|c|c */
67 shll $16, %edx /* Now c|c|0|0 */
68 movw %cx, %dx /* And finally c|c|c|c */
70 /* Better performance can be achieved if the word (32
71 bit) memory access is aligned on a four-byte-boundary.
72 So process first bytes one by one until boundary is
73 reached. Don't use a loop for better performance. */
75 testb $3, %al /* correctly aligned ? */
76 je L(2) /* yes => begin loop */
77 cmpb %dl, (%eax) /* compare byte */
78 je L(9) /* target found => return */
79 incl %eax /* increment source pointer */
80 decl %esi /* decrement length counter */
81 je L(4) /* len==0 => return NULL */
83 testb $3, %al /* correctly aligned ? */
84 je L(2) /* yes => begin loop */
85 cmpb %dl, (%eax) /* compare byte */
86 je L(9) /* target found => return */
87 incl %eax /* increment source pointer */
88 decl %esi /* decrement length counter */
89 je L(4) /* len==0 => return NULL */
91 testb $3, %al /* correctly aligned ? */
92 je L(2) /* yes => begin loop */
93 cmpb %dl, (%eax) /* compare byte */
94 je L(9) /* target found => return */
95 incl %eax /* increment source pointer */
96 decl %esi /* decrement length counter */
97 /* no test for len==0 here, because this is done in the
101 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
102 change any of the hole bits of LONGWORD.
104 1) Is this safe? Will it catch all the zero bytes?
105 Suppose there is a byte with all zeros. Any carry bits
106 propagating from its left will fall into the hole at its
107 least significant bit and stop. Since there will be no
108 carry from its most significant bit, the LSB of the
109 byte to the left will be unchanged, and the zero will be
112 2) Is this worthwhile? Will it ignore everything except
113 zero bytes? Suppose every byte of LONGWORD has a bit set
114 somewhere. There will be a carry into bit 8. If bit 8
115 is set, this will carry into bit 16. If bit 8 is clear,
116 one of bits 9-15 must be set, so there will be a carry
117 into bit 16. Similarly, there will be a carry into bit
118 24. If one of bits 24-31 is set, there will be a carry
119 into bit 32 (=carry flag), so all of the hole bits will
122 3) But wait! Aren't we looking for CHR, not zero?
123 Good point. So what we do is XOR LONGWORD with a longword,
124 each of whose bytes is CHR. This turns each byte that is CHR
128 /* Each round the main loop processes 16 bytes. */
132 L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
133 movl $0xfefefeff, %edi /* magic value */
134 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
136 addl %ecx, %edi /* add the magic value to the word. We get
137 carry bits reported for each byte which
140 /* According to the algorithm we had to reverse the effect of the
141 XOR first and then test the overflow bits. But because the
142 following XOR would destroy the carry flag and it would (in a
143 representation with more than 32 bits) not alter then last
144 overflow, we can now test this condition. If no carry is signaled
145 no overflow must have occurred in the last byte => it was 0. */
148 /* We are only interested in carry bits that change due to the
149 previous add, so remove original bits */
150 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
152 /* Now test for the other three overflow bits. */
153 orl $0xfefefeff, %edi /* set all non-carry bits */
154 incl %edi /* add 1: if one carry bit was *not* set
155 the addition will not result in 0. */
157 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
158 jnz L(8) /* found it => return pointer */
160 /* This process is unfolded four times for better performance.
161 we don't increment the source pointer each time. Instead we
162 use offsets and increment by 16 in each run of the loop. But
163 before probing for the matching byte we need some extra code
164 (following LL(13) below). Even the len can be compared with
165 constants instead of decrementing each time. */
167 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
168 movl $0xfefefeff, %edi /* magic value */
169 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
171 addl %ecx, %edi /* add the magic value to the word. We get
172 carry bits reported for each byte which
174 jnc L(7) /* highest byte is CHR => return pointer */
175 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
176 orl $0xfefefeff, %edi /* set all non-carry bits */
177 incl %edi /* add 1: if one carry bit was *not* set
178 the addition will not result in 0. */
179 jnz L(7) /* found it => return pointer */
181 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
182 movl $0xfefefeff, %edi /* magic value */
183 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
185 addl %ecx, %edi /* add the magic value to the word. We get
186 carry bits reported for each byte which
188 jnc L(6) /* highest byte is CHR => return pointer */
189 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
190 orl $0xfefefeff, %edi /* set all non-carry bits */
191 incl %edi /* add 1: if one carry bit was *not* set
192 the addition will not result in 0. */
193 jnz L(6) /* found it => return pointer */
195 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
196 movl $0xfefefeff, %edi /* magic value */
197 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
199 addl %ecx, %edi /* add the magic value to the word. We get
200 carry bits reported for each byte which
202 jnc L(5) /* highest byte is CHR => return pointer */
203 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
204 orl $0xfefefeff, %edi /* set all non-carry bits */
205 incl %edi /* add 1: if one carry bit was *not* set
206 the addition will not result in 0. */
207 jnz L(5) /* found it => return pointer */
209 /* Adjust both counters for a full round, i.e. 16 bytes. */
212 jae L(1) /* Still more than 16 bytes remaining */
214 /* Process remaining bytes separately. */
215 cmpl $4-16, %esi /* rest < 4 bytes? */
216 jb L(3) /* yes, than test byte by byte */
218 movl (%eax), %ecx /* get word (= 4 bytes) in question */
219 movl $0xfefefeff, %edi /* magic value */
220 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
222 addl %ecx, %edi /* add the magic value to the word. We get
223 carry bits reported for each byte which
225 jnc L(8) /* highest byte is CHR => return pointer */
226 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
227 orl $0xfefefeff, %edi /* set all non-carry bits */
228 incl %edi /* add 1: if one carry bit was *not* set
229 the addition will not result in 0. */
230 jne L(8) /* found it => return pointer */
231 addl $4, %eax /* adjust source pointer */
233 cmpl $8-16, %esi /* rest < 8 bytes? */
234 jb L(3) /* yes, than test byte by byte */
236 movl (%eax), %ecx /* get word (= 4 bytes) in question */
237 movl $0xfefefeff, %edi /* magic value */
238 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
240 addl %ecx, %edi /* add the magic value to the word. We get
241 carry bits reported for each byte which
243 jnc L(8) /* highest byte is CHR => return pointer */
244 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
245 orl $0xfefefeff, %edi /* set all non-carry bits */
246 incl %edi /* add 1: if one carry bit was *not* set
247 the addition will not result in 0. */
248 jne L(8) /* found it => return pointer */
249 addl $4, %eax /* adjust source pointer */
251 cmpl $12-16, %esi /* rest < 12 bytes? */
252 jb L(3) /* yes, than test byte by byte */
254 movl (%eax), %ecx /* get word (= 4 bytes) in question */
255 movl $0xfefefeff, %edi /* magic value */
256 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
258 addl %ecx, %edi /* add the magic value to the word. We get
259 carry bits reported for each byte which
261 jnc L(8) /* highest byte is CHR => return pointer */
262 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
263 orl $0xfefefeff, %edi /* set all non-carry bits */
264 incl %edi /* add 1: if one carry bit was *not* set
265 the addition will not result in 0. */
266 jne L(8) /* found it => return pointer */
267 addl $4, %eax /* adjust source pointer */
269 /* Check the remaining bytes one by one. */
270 L(3): andl $3, %esi /* mask out uninteresting bytes */
271 jz L(4) /* no remaining bytes => return NULL */
273 cmpb %dl, (%eax) /* compare byte with CHR */
274 je L(9) /* equal, than return pointer */
275 incl %eax /* increment source pointer */
276 decl %esi /* decrement length */
277 jz L(4) /* no remaining bytes => return NULL */
279 cmpb %dl, (%eax) /* compare byte with CHR */
280 je L(9) /* equal, than return pointer */
281 incl %eax /* increment source pointer */
282 decl %esi /* decrement length */
283 jz L(4) /* no remaining bytes => return NULL */
285 cmpb %dl, (%eax) /* compare byte with CHR */
286 je L(9) /* equal, than return pointer */
288 L(4): /* no byte found => return NULL */
292 /* add missing source pointer increments */
297 /* Test for the matching byte in the word. %ecx contains a NUL
298 char in the byte which originally was the byte we are looking
300 L(8): testb %cl, %cl /* test first byte in dword */
301 jz L(9) /* if zero => return pointer */
302 incl %eax /* increment source pointer */
304 testb %ch, %ch /* test second byte in dword */
305 jz L(9) /* if zero => return pointer */
306 incl %eax /* increment source pointer */
308 testl $0xff0000, %ecx /* test third byte in dword */
309 jz L(9) /* if zero => return pointer */
310 incl %eax /* increment source pointer */
312 /* No further test needed we we know it is one of the four bytes. */
314 #if __BOUNDED_POINTERS__
315 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
316 /* If RTN pointer is phony, don't copy return value into it. */
320 RETURN_BOUNDED_POINTER (STR(%esp))
322 L(pop): popl %edi /* pop saved registers */
327 END (BP_SYM (__memchr))
329 weak_alias (BP_SYM (__memchr), BP_SYM (memchr))
330 #if !__BOUNDED_POINTERS__
331 weak_alias (__memchr, __ubp_memchr)
333 libc_hidden_builtin_def (memchr)