1 /* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR
4 Copyright (C) 1994-1997, 1999, 2000, 2005 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Ulrich Drepper <drepper@gnu.org>
7 Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include "asm-syntax.h"
29 #define PARMS LINKAGE+4 /* space for 1 saved reg */
31 #define STR RTN+RTN_SIZE
32 #define CHR STR+PTR_SIZE
35 ENTRY (BP_SYM (__strchrnul))
38 pushl %edi /* Save callee-safe registers used here. */
39 cfi_adjust_cfa_offset (4)
40 cfi_rel_offset (edi, 0)
44 CHECK_BOUNDS_LOW (%eax, STR(%esp))
46 /* At the moment %edx contains CHR. What we need for the
47 algorithm is CHR in all bytes of the dword. Avoid
48 operations on 16 bit words because these require an
49 prefix byte (and one more cycle). */
50 movb %dl, %dh /* now it is 0|0|c|c */
52 shll $16, %edx /* now it is c|c|0|0 */
53 movw %cx, %dx /* and finally c|c|c|c */
55 /* Before we start with the main loop we process single bytes
56 until the source pointer is aligned. This has two reasons:
57 1. aligned 32-bit memory access is faster
59 2. we process in the main loop 32 bit in one step although
60 we don't know the end of the string. But accessing at
61 4-byte alignment guarantees that we never access illegal
62 memory if this would not also be done by the trivial
63 implementation (this is because all processor inherent
64 boundaries are multiples of 4. */
66 testb $3, %al /* correctly aligned ? */
67 jz L(11) /* yes => begin loop */
68 movb (%eax), %cl /* load byte in question (we need it twice) */
69 cmpb %cl, %dl /* compare byte */
70 je L(6) /* target found => return */
71 testb %cl, %cl /* is NUL? */
72 jz L(6) /* yes => return NULL */
73 incl %eax /* increment pointer */
75 testb $3, %al /* correctly aligned ? */
76 jz L(11) /* yes => begin loop */
77 movb (%eax), %cl /* load byte in question (we need it twice) */
78 cmpb %cl, %dl /* compare byte */
79 je L(6) /* target found => return */
80 testb %cl, %cl /* is NUL? */
81 jz L(6) /* yes => return NULL */
82 incl %eax /* increment pointer */
84 testb $3, %al /* correctly aligned ? */
85 jz L(11) /* yes => begin loop */
86 movb (%eax), %cl /* load byte in question (we need it twice) */
87 cmpb %cl, %dl /* compare byte */
88 je L(6) /* target found => return */
89 testb %cl, %cl /* is NUL? */
90 jz L(6) /* yes => return NULL */
91 incl %eax /* increment pointer */
93 /* No we have reached alignment. */
94 jmp L(11) /* begin loop */
96 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
97 change any of the hole bits of LONGWORD.
99 1) Is this safe? Will it catch all the zero bytes?
100 Suppose there is a byte with all zeros. Any carry bits
101 propagating from its left will fall into the hole at its
102 least significant bit and stop. Since there will be no
103 carry from its most significant bit, the LSB of the
104 byte to the left will be unchanged, and the zero will be
107 2) Is this worthwhile? Will it ignore everything except
108 zero bytes? Suppose every byte of LONGWORD has a bit set
109 somewhere. There will be a carry into bit 8. If bit 8
110 is set, this will carry into bit 16. If bit 8 is clear,
111 one of bits 9-15 must be set, so there will be a carry
112 into bit 16. Similarly, there will be a carry into bit
113 24. If one of bits 24-31 is set, there will be a carry
114 into bit 32 (=carry flag), so all of the hole bits will
117 3) But wait! Aren't we looking for CHR, not zero?
118 Good point. So what we do is XOR LONGWORD with a longword,
119 each of whose bytes is CHR. This turns each byte that is CHR
122 /* Each round the main loop processes 16 bytes. */
126 L(1): addl $16, %eax /* adjust pointer for whole round */
128 L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
129 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
131 movl $0xfefefeff, %edi /* magic value */
132 addl %ecx, %edi /* add the magic value to the word. We get
133 carry bits reported for each byte which
136 /* According to the algorithm we had to reverse the effect of the
137 XOR first and then test the overflow bits. But because the
138 following XOR would destroy the carry flag and it would (in a
139 representation with more than 32 bits) not alter then last
140 overflow, we can now test this condition. If no carry is signaled
141 no overflow must have occurred in the last byte => it was 0. */
144 /* We are only interested in carry bits that change due to the
145 previous add, so remove original bits */
146 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
148 /* Now test for the other three overflow bits. */
149 orl $0xfefefeff, %edi /* set all non-carry bits */
150 incl %edi /* add 1: if one carry bit was *not* set
151 the addition will not result in 0. */
153 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
154 jnz L(7) /* found it => return pointer */
156 /* Now we made sure the dword does not contain the character we are
157 looking for. But because we deal with strings we have to check
158 for the end of string before testing the next dword. */
160 xorl %edx, %ecx /* restore original dword without reload */
161 movl $0xfefefeff, %edi /* magic value */
162 addl %ecx, %edi /* add the magic value to the word. We get
163 carry bits reported for each byte which
165 jnc L(7) /* highest byte is NUL => return NULL */
166 xorl %ecx, %edi /* (word+magic)^word */
167 orl $0xfefefeff, %edi /* set all non-carry bits */
168 incl %edi /* add 1: if one carry bit was *not* set
169 the addition will not result in 0. */
170 jnz L(7) /* found NUL => return NULL */
172 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
173 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
175 movl $0xfefefeff, %edi /* magic value */
176 addl %ecx, %edi /* add the magic value to the word. We get
177 carry bits reported for each byte which
179 jnc L(71) /* highest byte is CHR => return pointer */
180 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
181 orl $0xfefefeff, %edi /* set all non-carry bits */
182 incl %edi /* add 1: if one carry bit was *not* set
183 the addition will not result in 0. */
184 jnz L(71) /* found it => return pointer */
185 xorl %edx, %ecx /* restore original dword without reload */
186 movl $0xfefefeff, %edi /* magic value */
187 addl %ecx, %edi /* add the magic value to the word. We get
188 carry bits reported for each byte which
190 jnc L(71) /* highest byte is NUL => return NULL */
191 xorl %ecx, %edi /* (word+magic)^word */
192 orl $0xfefefeff, %edi /* set all non-carry bits */
193 incl %edi /* add 1: if one carry bit was *not* set
194 the addition will not result in 0. */
195 jnz L(71) /* found NUL => return NULL */
197 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
198 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
200 movl $0xfefefeff, %edi /* magic value */
201 addl %ecx, %edi /* add the magic value to the word. We get
202 carry bits reported for each byte which
204 jnc L(72) /* highest byte is CHR => return pointer */
205 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
206 orl $0xfefefeff, %edi /* set all non-carry bits */
207 incl %edi /* add 1: if one carry bit was *not* set
208 the addition will not result in 0. */
209 jnz L(72) /* found it => return pointer */
210 xorl %edx, %ecx /* restore original dword without reload */
211 movl $0xfefefeff, %edi /* magic value */
212 addl %ecx, %edi /* add the magic value to the word. We get
213 carry bits reported for each byte which
215 jnc L(72) /* highest byte is NUL => return NULL */
216 xorl %ecx, %edi /* (word+magic)^word */
217 orl $0xfefefeff, %edi /* set all non-carry bits */
218 incl %edi /* add 1: if one carry bit was *not* set
219 the addition will not result in 0. */
220 jnz L(72) /* found NUL => return NULL */
222 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
223 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
225 movl $0xfefefeff, %edi /* magic value */
226 addl %ecx, %edi /* add the magic value to the word. We get
227 carry bits reported for each byte which
229 jnc L(73) /* highest byte is CHR => return pointer */
230 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
231 orl $0xfefefeff, %edi /* set all non-carry bits */
232 incl %edi /* add 1: if one carry bit was *not* set
233 the addition will not result in 0. */
234 jnz L(73) /* found it => return pointer */
235 xorl %edx, %ecx /* restore original dword without reload */
236 movl $0xfefefeff, %edi /* magic value */
237 addl %ecx, %edi /* add the magic value to the word. We get
238 carry bits reported for each byte which
240 jnc L(73) /* highest byte is NUL => return NULL */
241 xorl %ecx, %edi /* (word+magic)^word */
242 orl $0xfefefeff, %edi /* set all non-carry bits */
243 incl %edi /* add 1: if one carry bit was *not* set
244 the addition will not result in 0. */
245 jz L(1) /* no NUL found => restart loop */
247 L(73): addl $4, %eax /* adjust pointer */
251 /* We now scan for the byte in which the character was matched.
252 But we have to take care of the case that a NUL char is
253 found before this in the dword. */
255 L(7): testb %cl, %cl /* is first byte CHR? */
256 jz L(6) /* yes => return pointer */
257 cmpb %dl, %cl /* is first byte NUL? */
258 je L(6) /* yes => return NULL */
259 incl %eax /* it's not in the first byte */
261 testb %ch, %ch /* is second byte CHR? */
262 jz L(6) /* yes => return pointer */
263 cmpb %dl, %ch /* is second byte NUL? */
264 je L(6) /* yes => return NULL? */
265 incl %eax /* it's not in the second byte */
267 shrl $16, %ecx /* make upper byte accessible */
268 testb %cl, %cl /* is third byte CHR? */
269 jz L(6) /* yes => return pointer */
270 cmpb %dl, %cl /* is third byte NUL? */
271 je L(6) /* yes => return NULL */
273 /* It must be in the fourth byte and it cannot be NUL. */
276 L(6): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
277 RETURN_BOUNDED_POINTER (STR(%esp))
278 popl %edi /* restore saved register content */
279 cfi_adjust_cfa_offset (-4)
284 END (BP_SYM (__strchrnul))
286 weak_alias (BP_SYM (__strchrnul), BP_SYM (strchrnul))