1 /* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
3 Copyright (C) 1994-1998, 2000, 2003, 2005 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
15 The GNU C Library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, write to the Free
27 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 #include "asm-syntax.h"
35 #define PARMS LINKAGE+8 /* space for 2 saved regs */
37 #define STR RTN+RTN_SIZE
38 #define CHR STR+PTR_SIZE
42 ENTRY (BP_SYM (__memchr))
45 /* Save callee-safe registers used in this function. */
47 cfi_adjust_cfa_offset (4)
49 cfi_adjust_cfa_offset (4)
50 cfi_rel_offset (edi, 0)
52 /* Load parameters into registers. */
53 movl STR(%esp), %eax /* str: pointer to memory block. */
54 movl CHR(%esp), %edx /* c: byte we are looking for. */
55 movl LEN(%esp), %esi /* len: length of memory block. */
56 cfi_rel_offset (esi, 4)
57 CHECK_BOUNDS_LOW (%eax, STR(%esp))
59 /* If my must not test more than three characters test
60 them one by one. This is especially true for 0. */
64 /* At the moment %edx contains CHR. What we need for the
65 algorithm is CHR in all bytes of the dword. Avoid
66 operations on 16 bit words because these require an
67 prefix byte (and one more cycle). */
68 movb %dl, %dh /* Now it is 0|0|c|c */
70 shll $16, %edx /* Now c|c|0|0 */
71 movw %cx, %dx /* And finally c|c|c|c */
73 /* Better performance can be achieved if the word (32
74 bit) memory access is aligned on a four-byte-boundary.
75 So process first bytes one by one until boundary is
76 reached. Don't use a loop for better performance. */
78 testb $3, %al /* correctly aligned ? */
79 je L(2) /* yes => begin loop */
80 cmpb %dl, (%eax) /* compare byte */
81 je L(9) /* target found => return */
82 incl %eax /* increment source pointer */
83 decl %esi /* decrement length counter */
84 je L(4) /* len==0 => return NULL */
86 testb $3, %al /* correctly aligned ? */
87 je L(2) /* yes => begin loop */
88 cmpb %dl, (%eax) /* compare byte */
89 je L(9) /* target found => return */
90 incl %eax /* increment source pointer */
91 decl %esi /* decrement length counter */
92 je L(4) /* len==0 => return NULL */
94 testb $3, %al /* correctly aligned ? */
95 je L(2) /* yes => begin loop */
96 cmpb %dl, (%eax) /* compare byte */
97 je L(9) /* target found => return */
98 incl %eax /* increment source pointer */
99 decl %esi /* decrement length counter */
100 /* no test for len==0 here, because this is done in the
104 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
105 change any of the hole bits of LONGWORD.
107 1) Is this safe? Will it catch all the zero bytes?
108 Suppose there is a byte with all zeros. Any carry bits
109 propagating from its left will fall into the hole at its
110 least significant bit and stop. Since there will be no
111 carry from its most significant bit, the LSB of the
112 byte to the left will be unchanged, and the zero will be
115 2) Is this worthwhile? Will it ignore everything except
116 zero bytes? Suppose every byte of LONGWORD has a bit set
117 somewhere. There will be a carry into bit 8. If bit 8
118 is set, this will carry into bit 16. If bit 8 is clear,
119 one of bits 9-15 must be set, so there will be a carry
120 into bit 16. Similarly, there will be a carry into bit
121 24. If one of bits 24-31 is set, there will be a carry
122 into bit 32 (=carry flag), so all of the hole bits will
125 3) But wait! Aren't we looking for CHR, not zero?
126 Good point. So what we do is XOR LONGWORD with a longword,
127 each of whose bytes is CHR. This turns each byte that is CHR
131 /* Each round the main loop processes 16 bytes. */
135 L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
136 movl $0xfefefeff, %edi /* magic value */
137 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
139 addl %ecx, %edi /* add the magic value to the word. We get
140 carry bits reported for each byte which
143 /* According to the algorithm we had to reverse the effect of the
144 XOR first and then test the overflow bits. But because the
145 following XOR would destroy the carry flag and it would (in a
146 representation with more than 32 bits) not alter then last
147 overflow, we can now test this condition. If no carry is signaled
148 no overflow must have occurred in the last byte => it was 0. */
151 /* We are only interested in carry bits that change due to the
152 previous add, so remove original bits */
153 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
155 /* Now test for the other three overflow bits. */
156 orl $0xfefefeff, %edi /* set all non-carry bits */
157 incl %edi /* add 1: if one carry bit was *not* set
158 the addition will not result in 0. */
160 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
161 jnz L(8) /* found it => return pointer */
163 /* This process is unfolded four times for better performance.
164 we don't increment the source pointer each time. Instead we
165 use offsets and increment by 16 in each run of the loop. But
166 before probing for the matching byte we need some extra code
167 (following LL(13) below). Even the len can be compared with
168 constants instead of decrementing each time. */
170 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
171 movl $0xfefefeff, %edi /* magic value */
172 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
174 addl %ecx, %edi /* add the magic value to the word. We get
175 carry bits reported for each byte which
177 jnc L(7) /* highest byte is CHR => return pointer */
178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
179 orl $0xfefefeff, %edi /* set all non-carry bits */
180 incl %edi /* add 1: if one carry bit was *not* set
181 the addition will not result in 0. */
182 jnz L(7) /* found it => return pointer */
184 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
185 movl $0xfefefeff, %edi /* magic value */
186 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
188 addl %ecx, %edi /* add the magic value to the word. We get
189 carry bits reported for each byte which
191 jnc L(6) /* highest byte is CHR => return pointer */
192 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
193 orl $0xfefefeff, %edi /* set all non-carry bits */
194 incl %edi /* add 1: if one carry bit was *not* set
195 the addition will not result in 0. */
196 jnz L(6) /* found it => return pointer */
198 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
199 movl $0xfefefeff, %edi /* magic value */
200 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
202 addl %ecx, %edi /* add the magic value to the word. We get
203 carry bits reported for each byte which
205 jnc L(5) /* highest byte is CHR => return pointer */
206 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
207 orl $0xfefefeff, %edi /* set all non-carry bits */
208 incl %edi /* add 1: if one carry bit was *not* set
209 the addition will not result in 0. */
210 jnz L(5) /* found it => return pointer */
212 /* Adjust both counters for a full round, i.e. 16 bytes. */
215 jae L(1) /* Still more than 16 bytes remaining */
217 /* Process remaining bytes separately. */
218 cmpl $4-16, %esi /* rest < 4 bytes? */
219 jb L(3) /* yes, than test byte by byte */
221 movl (%eax), %ecx /* get word (= 4 bytes) in question */
222 movl $0xfefefeff, %edi /* magic value */
223 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
225 addl %ecx, %edi /* add the magic value to the word. We get
226 carry bits reported for each byte which
228 jnc L(8) /* highest byte is CHR => return pointer */
229 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
230 orl $0xfefefeff, %edi /* set all non-carry bits */
231 incl %edi /* add 1: if one carry bit was *not* set
232 the addition will not result in 0. */
233 jne L(8) /* found it => return pointer */
234 addl $4, %eax /* adjust source pointer */
236 cmpl $8-16, %esi /* rest < 8 bytes? */
237 jb L(3) /* yes, than test byte by byte */
239 movl (%eax), %ecx /* get word (= 4 bytes) in question */
240 movl $0xfefefeff, %edi /* magic value */
241 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
243 addl %ecx, %edi /* add the magic value to the word. We get
244 carry bits reported for each byte which
246 jnc L(8) /* highest byte is CHR => return pointer */
247 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
248 orl $0xfefefeff, %edi /* set all non-carry bits */
249 incl %edi /* add 1: if one carry bit was *not* set
250 the addition will not result in 0. */
251 jne L(8) /* found it => return pointer */
252 addl $4, %eax /* adjust source pointer */
254 cmpl $12-16, %esi /* rest < 12 bytes? */
255 jb L(3) /* yes, than test byte by byte */
257 movl (%eax), %ecx /* get word (= 4 bytes) in question */
258 movl $0xfefefeff, %edi /* magic value */
259 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
261 addl %ecx, %edi /* add the magic value to the word. We get
262 carry bits reported for each byte which
264 jnc L(8) /* highest byte is CHR => return pointer */
265 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
266 orl $0xfefefeff, %edi /* set all non-carry bits */
267 incl %edi /* add 1: if one carry bit was *not* set
268 the addition will not result in 0. */
269 jne L(8) /* found it => return pointer */
270 addl $4, %eax /* adjust source pointer */
272 /* Check the remaining bytes one by one. */
273 L(3): andl $3, %esi /* mask out uninteresting bytes */
274 jz L(4) /* no remaining bytes => return NULL */
276 cmpb %dl, (%eax) /* compare byte with CHR */
277 je L(9) /* equal, than return pointer */
278 incl %eax /* increment source pointer */
279 decl %esi /* decrement length */
280 jz L(4) /* no remaining bytes => return NULL */
282 cmpb %dl, (%eax) /* compare byte with CHR */
283 je L(9) /* equal, than return pointer */
284 incl %eax /* increment source pointer */
285 decl %esi /* decrement length */
286 jz L(4) /* no remaining bytes => return NULL */
288 cmpb %dl, (%eax) /* compare byte with CHR */
289 je L(9) /* equal, than return pointer */
291 L(4): /* no byte found => return NULL */
295 /* add missing source pointer increments */
300 /* Test for the matching byte in the word. %ecx contains a NUL
301 char in the byte which originally was the byte we are looking
303 L(8): testb %cl, %cl /* test first byte in dword */
304 jz L(9) /* if zero => return pointer */
305 incl %eax /* increment source pointer */
307 testb %ch, %ch /* test second byte in dword */
308 jz L(9) /* if zero => return pointer */
309 incl %eax /* increment source pointer */
311 testl $0xff0000, %ecx /* test third byte in dword */
312 jz L(9) /* if zero => return pointer */
313 incl %eax /* increment source pointer */
315 /* No further test needed we we know it is one of the four bytes. */
317 #if __BOUNDED_POINTERS__
318 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
319 /* If RTN pointer is phony, don't copy return value into it. */
323 RETURN_BOUNDED_POINTER (STR(%esp))
325 L(pop): popl %edi /* pop saved registers */
326 cfi_adjust_cfa_offset (-4)
329 cfi_adjust_cfa_offset (-4)
334 END (BP_SYM (__memchr))
336 weak_alias (BP_SYM (__memchr), BP_SYM (memchr))
337 #if !__BOUNDED_POINTERS__
338 weak_alias (__memchr, __ubp_memchr)
340 libc_hidden_builtin_def (memchr)