1 /* strlen -- Compute length of NUL terminated string.
2 Highly optimized version for ix86, x>=5.
3 Copyright (C) 1995-2015 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
22 #include "asm-syntax.h"
24 /* This version is especially optimized for the i586 (and following?)
25 processors. This is mainly done by using the two pipelines. The
26 version optimized for i486 is weak in this aspect because to get
27 as much parallelism we have to execute some *more* instructions.
29 The code below is structured to reflect the pairing of the instructions
30 as *I think* it is. I have no processor data book to verify this.
31 If you find something you think is incorrect let me know. */
34 /* The magic value which is used throughout in the whole code. */
35 #define magic 0xfefefeff
37 #define PARMS 4 /* no space for saved regs */
44 movl $3, %edx /* load mask (= 3) */
46 andl %eax, %edx /* separate last two bits of address */
48 jz L(1) /* aligned => start loop */
49 jp L(0) /* exactly two bits set */
51 cmpb %dh, (%eax) /* is byte NUL? */
52 je L(2) /* yes => return */
54 incl %eax /* increment pointer */
55 cmpb %dh, (%eax) /* is byte NUL? */
57 je L(2) /* yes => return */
59 incl %eax /* increment pointer */
64 L(0): cmpb %dh, (%eax) /* is byte NUL? */
65 je L(2) /* yes => return */
67 incl %eax /* increment pointer */
68 xorl %edx, %edx /* We need %edx == 0 for later */
70 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
71 change any of the hole bits of LONGWORD.
73 1) Is this safe? Will it catch all the zero bytes?
74 Suppose there is a byte with all zeros. Any carry bits
75 propagating from its left will fall into the hole at its
76 least significant bit and stop. Since there will be no
77 carry from its most significant bit, the LSB of the
78 byte to the left will be unchanged, and the zero will be
81 2) Is this worthwhile? Will it ignore everything except
82 zero bytes? Suppose every byte of LONGWORD has a bit set
83 somewhere. There will be a carry into bit 8. If bit 8
84 is set, this will carry into bit 16. If bit 8 is clear,
85 one of bits 9-15 must be set, so there will be a carry
86 into bit 16. Similarly, there will be a carry into bit
87 24. If one of bits 24-31 is set, there will be a carry
88 into bit 32 (=carry flag), so all of the hole bits will
91 Note: %edx == 0 in any case here. */
94 movl (%eax), %ecx /* get word (= 4 bytes) in question */
95 addl $4, %eax /* adjust pointer for *next* word */
97 subl %ecx, %edx /* first step to negate word */
98 addl $magic, %ecx /* add magic word */
100 decl %edx /* complete negation of word */
101 jnc L(3) /* previous addl caused overflow? */
103 xorl %ecx, %edx /* (word+magic)^word */
105 andl $~magic, %edx /* any of the carry flags set? */
107 jne L(3) /* yes => determine byte */
110 movl (%eax), %ecx /* get word (= 4 bytes) in question */
111 addl $4, %eax /* adjust pointer for *next* word */
113 subl %ecx, %edx /* first step to negate word */
114 addl $magic, %ecx /* add magic word */
116 decl %edx /* complete negation of word */
117 jnc L(3) /* previous addl caused overflow? */
119 xorl %ecx, %edx /* (word+magic)^word */
121 andl $~magic, %edx /* any of the carry flags set? */
123 jne L(3) /* yes => determine byte */
126 movl (%eax), %ecx /* get word (= 4 bytes) in question */
127 addl $4, %eax /* adjust pointer for *next* word */
129 subl %ecx, %edx /* first step to negate word */
130 addl $magic, %ecx /* add magic word */
132 decl %edx /* complete negation of word */
133 jnc L(3) /* previous addl caused overflow? */
135 xorl %ecx, %edx /* (word+magic)^word */
137 andl $~magic, %edx /* any of the carry flags set? */
139 jne L(3) /* yes => determine byte */
142 movl (%eax), %ecx /* get word (= 4 bytes) in question */
143 addl $4, %eax /* adjust pointer for *next* word */
145 subl %ecx, %edx /* first step to negate word */
146 addl $magic, %ecx /* add magic word */
148 decl %edx /* complete negation of word */
149 jnc L(3) /* previous addl caused overflow? */
151 xorl %ecx, %edx /* (word+magic)^word */
153 andl $~magic, %edx /* any of the carry flags set? */
155 je L(1) /* no => start loop again */
158 L(3): subl $4, %eax /* correct too early pointer increment */
161 cmpb $0, %cl /* lowest byte NUL? */
162 jz L(2) /* yes => return */
164 inc %eax /* increment pointer */
165 testb %ch, %ch /* second byte NUL? */
167 jz L(2) /* yes => return */
169 shrl $16, %ecx /* make upper bytes accessible */
170 incl %eax /* increment pointer */
172 cmpb $0, %cl /* is third byte NUL? */
173 jz L(2) /* yes => return */
175 incl %eax /* increment pointer */
177 L(2): subl STR(%esp), %eax /* now compute the length as difference
178 between start and terminating NUL
182 libc_hidden_builtin_def (strlen)