1 /* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
3 Copyright (C) 1994-2000,2002,2005 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
15 The GNU C Library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, write to the Free
27 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 #include "asm-syntax.h"
35 #define PARMS LINKAGE+4 /* space for 1 saved reg */
37 #define STR RTN+RTN_SIZE
38 #define CHR STR+PTR_SIZE
41 ENTRY (BP_SYM (__rawmemchr))
44 /* Save callee-safe register used in this function. */
46 cfi_adjust_cfa_offset (4)
47 cfi_rel_offset (edi, 0)
49 /* Load parameters into registers. */
52 CHECK_BOUNDS_LOW (%eax, STR(%esp))
54 /* At the moment %edx contains C. What we need for the
55 algorithm is C in all bytes of the dword. Avoid
56 operations on 16 bit words because these require an
57 prefix byte (and one more cycle). */
58 movb %dl, %dh /* Now it is 0|0|c|c */
60 shll $16, %edx /* Now c|c|0|0 */
61 movw %cx, %dx /* And finally c|c|c|c */
63 /* Better performance can be achieved if the word (32
64 bit) memory access is aligned on a four-byte-boundary.
65 So process first bytes one by one until boundary is
66 reached. Don't use a loop for better performance. */
68 testb $3, %al /* correctly aligned ? */
69 je L(1) /* yes => begin loop */
70 cmpb %dl, (%eax) /* compare byte */
71 je L(9) /* target found => return */
72 incl %eax /* increment source pointer */
74 testb $3, %al /* correctly aligned ? */
75 je L(1) /* yes => begin loop */
76 cmpb %dl, (%eax) /* compare byte */
77 je L(9) /* target found => return */
78 incl %eax /* increment source pointer */
80 testb $3, %al /* correctly aligned ? */
81 je L(1) /* yes => begin loop */
82 cmpb %dl, (%eax) /* compare byte */
83 je L(9) /* target found => return */
84 incl %eax /* increment source pointer */
86 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
87 change any of the hole bits of LONGWORD.
89 1) Is this safe? Will it catch all the zero bytes?
90 Suppose there is a byte with all zeros. Any carry bits
91 propagating from its left will fall into the hole at its
92 least significant bit and stop. Since there will be no
93 carry from its most significant bit, the LSB of the
94 byte to the left will be unchanged, and the zero will be
97 2) Is this worthwhile? Will it ignore everything except
98 zero bytes? Suppose every byte of LONGWORD has a bit set
99 somewhere. There will be a carry into bit 8. If bit 8
100 is set, this will carry into bit 16. If bit 8 is clear,
101 one of bits 9-15 must be set, so there will be a carry
102 into bit 16. Similarly, there will be a carry into bit
103 24. If one of bits 24-31 is set, there will be a carry
104 into bit 32 (=carry flag), so all of the hole bits will
107 3) But wait! Aren't we looking for C, not zero?
108 Good point. So what we do is XOR LONGWORD with a longword,
109 each of whose bytes is C. This turns each byte that is C
113 /* Each round the main loop processes 16 bytes. */
116 L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
117 movl $0xfefefeff, %edi /* magic value */
118 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
120 addl %ecx, %edi /* add the magic value to the word. We get
121 carry bits reported for each byte which
124 /* According to the algorithm we had to reverse the effect of the
125 XOR first and then test the overflow bits. But because the
126 following XOR would destroy the carry flag and it would (in a
127 representation with more than 32 bits) not alter then last
128 overflow, we can now test this condition. If no carry is signaled
129 no overflow must have occurred in the last byte => it was 0. */
132 /* We are only interested in carry bits that change due to the
133 previous add, so remove original bits */
134 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
136 /* Now test for the other three overflow bits. */
137 orl $0xfefefeff, %edi /* set all non-carry bits */
138 incl %edi /* add 1: if one carry bit was *not* set
139 the addition will not result in 0. */
141 /* If at least one byte of the word is C we don't get 0 in %edi. */
142 jnz L(8) /* found it => return pointer */
144 /* This process is unfolded four times for better performance.
145 we don't increment the source pointer each time. Instead we
146 use offsets and increment by 16 in each run of the loop. But
147 before probing for the matching byte we need some extra code
148 (following LL(13) below). Even the len can be compared with
149 constants instead of decrementing each time. */
151 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
152 movl $0xfefefeff, %edi /* magic value */
153 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
155 addl %ecx, %edi /* add the magic value to the word. We get
156 carry bits reported for each byte which
158 jnc L(7) /* highest byte is C => return pointer */
159 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
160 orl $0xfefefeff, %edi /* set all non-carry bits */
161 incl %edi /* add 1: if one carry bit was *not* set
162 the addition will not result in 0. */
163 jnz L(7) /* found it => return pointer */
165 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
166 movl $0xfefefeff, %edi /* magic value */
167 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
169 addl %ecx, %edi /* add the magic value to the word. We get
170 carry bits reported for each byte which
172 jnc L(6) /* highest byte is C => return pointer */
173 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
174 orl $0xfefefeff, %edi /* set all non-carry bits */
175 incl %edi /* add 1: if one carry bit was *not* set
176 the addition will not result in 0. */
177 jnz L(6) /* found it => return pointer */
179 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
180 movl $0xfefefeff, %edi /* magic value */
181 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
183 addl %ecx, %edi /* add the magic value to the word. We get
184 carry bits reported for each byte which
186 jnc L(5) /* highest byte is C => return pointer */
187 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
188 orl $0xfefefeff, %edi /* set all non-carry bits */
189 incl %edi /* add 1: if one carry bit was *not* set
190 the addition will not result in 0. */
191 jnz L(5) /* found it => return pointer */
193 /* Adjust both counters for a full round, i.e. 16 bytes. */
196 /* add missing source pointer increments */
201 /* Test for the matching byte in the word. %ecx contains a NUL
202 char in the byte which originally was the byte we are looking
204 L(8): testb %cl, %cl /* test first byte in dword */
205 jz L(9) /* if zero => return pointer */
206 incl %eax /* increment source pointer */
208 testb %ch, %ch /* test second byte in dword */
209 jz L(9) /* if zero => return pointer */
210 incl %eax /* increment source pointer */
212 testl $0xff0000, %ecx /* test third byte in dword */
213 jz L(9) /* if zero => return pointer */
214 incl %eax /* increment source pointer */
216 /* No further test needed we we know it is one of the four bytes. */
219 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
220 RETURN_BOUNDED_POINTER (STR(%esp))
221 popl %edi /* pop saved register */
222 cfi_adjust_cfa_offset (-4)
227 END (BP_SYM (__rawmemchr))
229 libc_hidden_def (BP_SYM (__rawmemchr))
230 weak_alias (BP_SYM (__rawmemchr), BP_SYM (rawmemchr))