1 /* Find character CH in a NUL terminated string.
2 Highly optimized version for ix85, x>=5.
3 Copyright (C) 1995,1996,1997,2000,2003,2005 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 #include "asm-syntax.h"
27 /* This version is especially optimized for the i586 (and following?)
28 processors. This is mainly done by using the two pipelines. The
29 version optimized for i486 is weak in this aspect because to get
30 as much parallelism we have to execute some *more* instructions.
32 The code below is structured to reflect the pairing of the instructions
33 as *I think* it is. I have no processor data book to verify this.
34 If you find something you think is incorrect let me know. */
37 /* The magic value which is used throughout in the whole code. */
38 #define magic 0xfefefeff
40 #define PARMS LINKAGE+16 /* space for 4 saved regs */
42 #define STR RTN+RTN_SIZE
43 #define CHR STR+PTR_SIZE
46 ENTRY (BP_SYM (strchr))
49 pushl %edi /* Save callee-safe registers. */
50 cfi_adjust_cfa_offset (-4)
52 cfi_adjust_cfa_offset (-4)
55 cfi_adjust_cfa_offset (-4)
57 cfi_adjust_cfa_offset (-4)
61 CHECK_BOUNDS_LOW (%eax, STR(%esp))
63 movl %eax, %edi /* duplicate string pointer for later */
64 cfi_rel_offset (edi, 12)
65 xorl %ecx, %ecx /* clear %ecx */
67 /* At the moment %edx contains C. What we need for the
68 algorithm is C in all bytes of the dword. Avoid
69 operations on 16 bit words because these require an
70 prefix byte (and one more cycle). */
71 movb %dl, %dh /* now it is 0|0|c|c */
72 movb %dl, %cl /* we construct the lower half in %ecx */
74 shll $16, %edx /* now %edx is c|c|0|0 */
75 movb %cl, %ch /* now %ecx is 0|0|c|c */
77 orl %ecx, %edx /* and finally c|c|c|c */
78 andl $3, %edi /* mask alignment bits */
80 jz L(11) /* alignment is 0 => start loop */
82 movb %dl, %cl /* 0 is needed below */
83 jp L(0) /* exactly two bits set */
85 xorb (%eax), %cl /* is byte the one we are looking for? */
86 jz L(2) /* yes => return pointer */
88 xorb %dl, %cl /* load single byte and test for NUL */
89 je L(3) /* yes => return NULL */
91 movb 1(%eax), %cl /* load single byte */
94 cmpb %cl, %dl /* is byte == C? */
95 je L(2) /* aligned => return pointer */
97 cmpb $0, %cl /* is byte NUL? */
98 je L(3) /* yes => return NULL */
105 L(0): movb (%eax), %cl /* load single byte */
107 cmpb %cl, %dl /* is byte == C? */
108 je L(2) /* aligned => return pointer */
110 cmpb $0, %cl /* is byte NUL? */
111 je L(3) /* yes => return NULL */
113 incl %eax /* increment pointer */
115 cfi_rel_offset (esi, 8)
116 cfi_rel_offset (ebx, 4)
117 cfi_rel_offset (ebp, 0)
119 /* The following code is the preparation for the loop. The
120 four instruction up to `L1' will not be executed in the loop
121 because the same code is found at the end of the loop, but
122 there it is executed in parallel with other instructions. */
123 L(11): movl (%eax), %ecx
129 /* The main loop: it looks complex and indeed it is. I would
130 love to say `it was hard to write, so it should he hard to
131 read' but I will give some more hints. To fully understand
132 this code you should first take a look at the i486 version.
133 The basic algorithm is the same, but here the code organized
134 in a way which permits to use both pipelines all the time.
136 I tried to make it a bit more understandable by indenting
137 the code according to stage in the algorithm. It goes as
139 check for 0 in 1st word
140 check for C in 1st word
141 check for 0 in 2nd word
142 check for C in 2nd word
143 check for 0 in 3rd word
144 check for C in 3rd word
145 check for 0 in 4th word
146 check for C in 4th word
148 Please note that doing the test for NUL before the test for
149 C allows us to overlap the test for 0 in the next word with
152 L(1): xorl %ecx, %ebp /* (word^magic) */
153 addl %ecx, %edi /* add magic word */
155 leal 4(%eax), %eax /* increment pointer */
156 jnc L(4) /* previous addl caused overflow? */
158 movl %ecx, %ebx /* duplicate original word */
159 orl $magic, %ebp /* (word^magic)|magic */
161 addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */
162 jne L(4) /* yes => we found word with NUL */
164 movl $magic, %esi /* load magic value */
165 xorl %edx, %ebx /* clear words which are C */
168 addl %ebx, %esi /* (word+magic) */
171 jnc L(5) /* previous addl caused overflow? */
174 xorl %ebx, %esi /* (word+magic)^word */
177 orl $magic, %esi /* ((word+magic)^word)|magic */
179 addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/
180 jne L(5) /* yes => we found word with C */
273 /* We know there is no NUL byte but a C byte in the word.
274 %ebx contains NUL in this particular byte. */
275 L(5): subl $4, %eax /* adjust pointer */
276 testb %bl, %bl /* first byte == C? */
278 jz L(2) /* yes => return pointer */
280 incl %eax /* increment pointer */
281 testb %bh, %bh /* second byte == C? */
283 jz L(2) /* yes => return pointer */
285 shrl $16, %ebx /* make upper bytes accessible */
286 incl %eax /* increment pointer */
288 cmp $0, %bl /* third byte == C */
289 je L(2) /* yes => return pointer */
291 incl %eax /* increment pointer */
293 L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
294 RETURN_BOUNDED_POINTER (STR(%esp))
295 L(out): popl %ebp /* restore saved registers */
296 cfi_adjust_cfa_offset (-4)
299 cfi_adjust_cfa_offset (-4)
303 cfi_adjust_cfa_offset (-4)
306 cfi_adjust_cfa_offset (-4)
312 cfi_adjust_cfa_offset (16)
313 cfi_rel_offset (edi, 12)
314 cfi_rel_offset (esi, 8)
315 cfi_rel_offset (ebx, 4)
316 cfi_rel_offset (ebp, 0)
317 /* We know there is a NUL byte in the word. But we have to test
318 whether there is an C byte before it in the word. */
319 L(4): subl $4, %eax /* adjust pointer */
320 cmpb %dl, %cl /* first byte == C? */
322 je L(2) /* yes => return pointer */
324 cmpb $0, %cl /* first byte == NUL? */
325 je L(3) /* yes => return NULL */
327 incl %eax /* increment pointer */
329 cmpb %dl, %ch /* second byte == C? */
330 je L(2) /* yes => return pointer */
332 cmpb $0, %ch /* second byte == NUL? */
333 je L(3) /* yes => return NULL */
335 shrl $16, %ecx /* make upper bytes accessible */
336 incl %eax /* increment pointer */
338 cmpb %dl, %cl /* third byte == C? */
339 je L(2) /* yes => return pointer */
341 cmpb $0, %cl /* third byte == NUL? */
342 je L(3) /* yes => return NULL */
344 incl %eax /* increment pointer */
346 /* The test four the fourth byte is necessary! */
347 cmpb %dl, %ch /* fourth byte == C? */
348 je L(2) /* yes => return pointer */
350 L(3): xorl %eax, %eax
351 RETURN_NULL_BOUNDED_POINTER
353 END (BP_SYM (strchr))
356 weak_alias (BP_SYM (strchr), BP_SYM (index))
357 libc_hidden_builtin_def (strchr)