1 /* strlen -- Compute length of NUL terminated string.
2 Highly optimized version for ix86, x>=5.
3 Copyright (C) 1995,1996,1997,2000,2002,2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 #include "asm-syntax.h"
27 /* This version is especially optimized for the i586 (and following?)
28 processors. This is mainly done by using the two pipelines. The
29 version optimized for i486 is weak in this aspect because to get
30 as much parallelism we have to execute some *more* instructions.
32 The code below is structured to reflect the pairing of the instructions
33 as *I think* it is. I have no processor data book to verify this.
34 If you find something you think is incorrect let me know. */
37 /* The magic value which is used throughout in the whole code. */
38 #define magic 0xfefefeff
40 #define PARMS LINKAGE /* no space for saved regs */
44 ENTRY (BP_SYM (strlen))
48 CHECK_BOUNDS_LOW (%eax, STR(%esp))
49 movl $3, %edx /* load mask (= 3) */
51 andl %eax, %edx /* separate last two bits of address */
53 jz L(1) /* aligned => start loop */
54 jp L(0) /* exactly two bits set */
56 cmpb %dh, (%eax) /* is byte NUL? */
57 je L(2) /* yes => return */
59 incl %eax /* increment pointer */
60 cmpb %dh, (%eax) /* is byte NUL? */
62 je L(2) /* yes => return */
64 incl %eax /* increment pointer */
69 L(0): cmpb %dh, (%eax) /* is byte NUL? */
70 je L(2) /* yes => return */
72 incl %eax /* increment pointer */
73 xorl %edx, %edx /* We need %edx == 0 for later */
75 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
76 change any of the hole bits of LONGWORD.
78 1) Is this safe? Will it catch all the zero bytes?
79 Suppose there is a byte with all zeros. Any carry bits
80 propagating from its left will fall into the hole at its
81 least significant bit and stop. Since there will be no
82 carry from its most significant bit, the LSB of the
83 byte to the left will be unchanged, and the zero will be
86 2) Is this worthwhile? Will it ignore everything except
87 zero bytes? Suppose every byte of LONGWORD has a bit set
88 somewhere. There will be a carry into bit 8. If bit 8
89 is set, this will carry into bit 16. If bit 8 is clear,
90 one of bits 9-15 must be set, so there will be a carry
91 into bit 16. Similarly, there will be a carry into bit
92 24. If one of bits 24-31 is set, there will be a carry
93 into bit 32 (=carry flag), so all of the hole bits will
96 Note: %edx == 0 in any case here. */
99 movl (%eax), %ecx /* get word (= 4 bytes) in question */
100 addl $4, %eax /* adjust pointer for *next* word */
102 subl %ecx, %edx /* first step to negate word */
103 addl $magic, %ecx /* add magic word */
105 decl %edx /* complete negation of word */
106 jnc L(3) /* previous addl caused overflow? */
108 xorl %ecx, %edx /* (word+magic)^word */
110 andl $~magic, %edx /* any of the carry flags set? */
112 jne L(3) /* yes => determine byte */
115 movl (%eax), %ecx /* get word (= 4 bytes) in question */
116 addl $4, %eax /* adjust pointer for *next* word */
118 subl %ecx, %edx /* first step to negate word */
119 addl $magic, %ecx /* add magic word */
121 decl %edx /* complete negation of word */
122 jnc L(3) /* previous addl caused overflow? */
124 xorl %ecx, %edx /* (word+magic)^word */
126 andl $~magic, %edx /* any of the carry flags set? */
128 jne L(3) /* yes => determine byte */
131 movl (%eax), %ecx /* get word (= 4 bytes) in question */
132 addl $4, %eax /* adjust pointer for *next* word */
134 subl %ecx, %edx /* first step to negate word */
135 addl $magic, %ecx /* add magic word */
137 decl %edx /* complete negation of word */
138 jnc L(3) /* previous addl caused overflow? */
140 xorl %ecx, %edx /* (word+magic)^word */
142 andl $~magic, %edx /* any of the carry flags set? */
144 jne L(3) /* yes => determine byte */
147 movl (%eax), %ecx /* get word (= 4 bytes) in question */
148 addl $4, %eax /* adjust pointer for *next* word */
150 subl %ecx, %edx /* first step to negate word */
151 addl $magic, %ecx /* add magic word */
153 decl %edx /* complete negation of word */
154 jnc L(3) /* previous addl caused overflow? */
156 xorl %ecx, %edx /* (word+magic)^word */
158 andl $~magic, %edx /* any of the carry flags set? */
160 je L(1) /* no => start loop again */
163 L(3): subl $4, %eax /* correct too early pointer increment */
166 cmpb $0, %cl /* lowest byte NUL? */
167 jz L(2) /* yes => return */
169 inc %eax /* increment pointer */
170 testb %ch, %ch /* second byte NUL? */
172 jz L(2) /* yes => return */
174 shrl $16, %ecx /* make upper bytes accessible */
175 incl %eax /* increment pointer */
177 cmpb $0, %cl /* is third byte NUL? */
178 jz L(2) /* yes => return */
180 incl %eax /* increment pointer */
182 L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
183 subl STR(%esp), %eax /* now compute the length as difference
184 between start and terminating NUL
188 END (BP_SYM (strlen))
189 libc_hidden_builtin_def (strlen)