1 /* Compare two strings for differences.
3 Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
24 .register %g2, #scratch
25 .register %g3, #scratch
26 .register %g6, #scratch
29 /* Normally, this uses
30 ((xword - 0x0101010101010101) & 0x8080808080808080) test
31 to find out if any byte in xword could be zero. This is fast, but
32 also gives false alarm for any byte in range 0x81-0xff. It does
33 not matter for correctness, as if this test tells us there could
34 be some zero byte, we check it byte by byte, but if bytes with
35 high bits set are common in the strings, then this will give poor
36 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
37 will use one tick slower, but more precise test
38 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
39 which does not give any false alarms (but if some bits are set,
40 one cannot assume from it which bytes are zero and which are not).
41 It is yet to be measured, what is the correct default for glibc
42 in these days for an average user.
48 sethi %hi(0x01010101), %g1 /* IEU0 Group */
49 andcc %o0, 7, %g0 /* IEU1 */
50 bne,pn %icc, 7f /* CTI */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
53 andcc %o1, 7, %g3 /* IEU1 */
54 bne,pn %icc, 9f /* CTI */
55 sllx %g1, 32, %g2 /* IEU0 Group */
56 ldx [%o0], %o2 /* Load */
58 or %g1, %g2, %g1 /* IEU0 Group */
59 1: ldx [%o1], %o3 /* Load */
60 sub %o1, %o0, %o1 /* IEU1 */
61 sllx %g1, 7, %g2 /* IEU0 Group */
63 2: add %o0, 8, %o0 /* IEU1 */
64 sub %o2, %g1, %g3 /* IEU0 Group */
65 subcc %o2, %o3, %g0 /* IEU1 */
66 bne,pn %xcc, 13f /* CTI */
68 #ifdef EIGHTBIT_NOT_RARE
69 andn %g3, %o2, %g4 /* IEU0 Group */
70 ldxa [%o0] ASI_PNF, %o2 /* Load */
71 andcc %g4, %g2, %g0 /* IEU1 Group */
73 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
74 andcc %g3, %g2, %g0 /* IEU1 */
76 be,a,pt %xcc, 2b /* CTI */
77 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */
79 addcc %g3, %g1, %o4 /* IEU1 */
80 srlx %g3, 32, %g3 /* IEU0 */
81 andcc %g3, %g2, %g0 /* IEU1 Group */
82 be,pt %xcc, 3f /* CTI */
84 srlx %o4, 56, %o5 /* IEU0 */
85 andcc %o5, 0xff, %g0 /* IEU1 Group */
86 be,pn %icc, 4f /* CTI */
87 srlx %o4, 48, %o5 /* IEU0 */
89 andcc %o5, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 4f /* CTI */
91 srlx %o4, 40, %o5 /* IEU0 */
92 andcc %o5, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 4f /* CTI */
95 srlx %o4, 32, %o5 /* IEU0 */
96 andcc %o5, 0xff, %g0 /* IEU1 Group */
97 be,pn %icc, 4f /* CTI */
99 3: srlx %o4, 24, %o5 /* IEU0 */
100 andcc %o5, 0xff, %g0 /* IEU1 Group */
101 be,pn %icc, 4f /* CTI */
102 srlx %o4, 16, %o5 /* IEU0 */
104 andcc %o5, 0xff, %g0 /* IEU1 Group */
105 be,pn %icc, 4f /* CTI */
106 srlx %o4, 8, %o5 /* IEU0 */
107 andcc %o5, 0xff, %g0 /* IEU1 Group */
109 be,pn %icc, 4f /* CTI */
110 andcc %o4, 0xff, %g0 /* IEU1 Group */
111 bne,a,pn %icc, 2b /* CTI */
112 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */
114 4: retl /* CTI+IEU1 Group */
118 13: mov 0xff, %g6 /* IEU0 Group */
119 #ifdef EIGHTBIT_NOT_RARE
120 andcc %g4, %g2, %g0 /* IEU1 */
122 andcc %g3, %g2, %g0 /* IEU1 */
124 be,pt %xcc, 25f /* CTI */
125 addcc %g3, %g1, %o4 /* IEU1 Group */
127 srlx %g3, 32, %g3 /* IEU0 */
128 andcc %g3, %g2, %g0 /* IEU1 Group */
129 be,pt %xcc, 23f /* CTI */
130 sllx %g6, 56, %o5 /* IEU0 */
132 andcc %o4, %o5, %g0 /* IEU1 Group */
133 be,pn %xcc, 24f /* CTI */
134 sllx %g6, 48, %o5 /* IEU0 */
135 andcc %o4, %o5, %g0 /* IEU1 Group */
137 be,pn %xcc, 24f /* CTI */
138 sllx %g6, 40, %o5 /* IEU0 */
139 andcc %o4, %o5, %g0 /* IEU1 Group */
140 be,pn %xcc, 24f /* CTI */
142 sllx %g6, 32, %o5 /* IEU0 */
143 andcc %o4, %o5, %g0 /* IEU1 Group */
144 be,pn %xcc, 24f /* CTI */
145 23: sllx %g6, 24, %o5 /* IEU0 */
147 andcc %o4, %o5, %g0 /* IEU1 Group */
148 be,pn %icc, 24f /* CTI */
149 sllx %g6, 16, %o5 /* IEU0 */
150 andcc %o4, %o5, %g0 /* IEU1 Group */
152 be,pn %icc, 24f /* CTI */
153 sllx %g6, 8, %o5 /* IEU0 */
154 andcc %o4, %o5, %g0 /* IEU1 Group */
155 be,pn %icc, 24f /* CTI */
157 mov %g6, %o5 /* IEU0 */
158 25: cmp %o4, %o3 /* IEU1 Group */
159 5: mov -1, %o0 /* IEU0 */
160 retl /* CTI+IEU1 Group */
162 movgu %xcc, 1, %o0 /* Single Group */
165 24: sub %o5, 1, %g6 /* IEU0 Group */
167 or %o5, %g6, %o5 /* IEU0 Group */
168 andn %o4, %o5, %o4 /* IEU0 Group */
170 andn %o3, %o5, %o3 /* IEU1 */
171 cmp %o4, %o3 /* IEU1 Group */
172 movgu %xcc, 1, %o0 /* Single Group */
173 retl /* CTI+IEU1 Group */
175 movlu %xcc, -1, %o0 /* Single Group */
176 6: retl /* CTI+IEU1 Group */
177 mov %o4, %o0 /* IEU0 */
180 7: ldub [%o0], %o2 /* Load */
181 add %o0, 1, %o0 /* IEU1 */
182 ldub [%o1], %o3 /* Load Group */
183 sllx %g1, 32, %g2 /* IEU0 */
185 8: add %o1, 1, %o1 /* IEU1 */
186 subcc %o2, %o3, %o4 /* IEU1 Group */
187 bne,pn %xcc, 6b /* CTI */
188 lduba [%o0] ASI_PNF, %o2 /* Load */
190 brz,pn %o3, 4b /* CTI+IEU1 Group */
191 lduba [%o1] ASI_PNF, %o3 /* Load */
192 andcc %o0, 7, %g0 /* IEU1 Group */
193 bne,a,pn %icc, 8b /* CTI */
195 add %o0, 1, %o0 /* IEU0 */
196 or %g1, %g2, %g1 /* IEU0 Group */
197 andcc %o1, 7, %g3 /* IEU1 */
198 be,a,pn %icc, 1b /* CTI */
200 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
201 9: sllx %g3, 3, %g5 /* IEU0 */
202 mov 64, %o5 /* IEU1 */
203 sub %o1, %g3, %o1 /* IEU0 Group */
205 sub %o5, %g5, %o5 /* IEU1 */
206 ldxa [%o1] ASI_PNF, %g6 /* Load Group */
207 or %g1, %g2, %g1 /* IEU0 */
208 sub %o1, %o0, %o1 /* IEU1 */
210 sllx %g1, 7, %g2 /* IEU0 Group */
211 add %o1, 8, %o1 /* IEU1 */
212 /* %g1 = 0101010101010101
213 * %g2 = 8080808080800880
214 * %g5 = number of bits to shift left
215 * %o5 = number of bits to shift right */
216 10: sllx %g6, %g5, %o3 /* IEU0 Group */
217 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
219 11: srlx %g6, %o5, %o4 /* IEU0 Group */
220 ldxa [%o0] ASI_PNF, %o2 /* Load */
221 or %o3, %o4, %o3 /* IEU1 */
222 add %o0, 8, %o0 /* IEU0 Group */
224 subcc %o2, %o3, %g0 /* IEU1 */
225 #ifdef EIGHTBIT_NOT_RARE
226 sub %o2, %g1, %g3 /* IEU0 Group */
227 bne,pn %xcc, 13b /* CTI */
228 andn %g3, %o2, %g4 /* IEU0 Group */
230 andcc %g4, %g2, %g0 /* IEU1 Group */
231 be,pt %xcc, 10b /* CTI */
232 srlx %g4, 32, %g4 /* IEU0 */
233 andcc %g4, %g2, %g0 /* IEU1 Group */
235 bne,pn %xcc, 13b /* CTI */
236 sub %o2, %g1, %g3 /* IEU0 Group */
237 andcc %g3, %g2, %g0 /* IEU1 Group */
239 be,pt %xcc, 10b /* CTI */
240 srlx %g3, 32, %g3 /* IEU0 */
241 andcc %g3, %g2, %g0 /* IEU1 Group */
243 be,pt %xcc, 12f /* CTI */
245 srlx %o2, 56, %g3 /* IEU0 */
246 andcc %g3, 0xff, %g0 /* IEU1 Group */
247 be,pn %icc, 4b /* CTI */
248 srlx %o2, 48, %g3 /* IEU0 */
250 andcc %g3, 0xff, %g0 /* IEU1 Group */
251 be,pn %icc, 4b /* CTI */
252 srlx %o2, 40, %g3 /* IEU0 */
253 andcc %g3, 0xff, %g0 /* IEU1 Group */
255 be,pn %icc, 4b /* CTI */
256 srlx %o2, 32, %g3 /* IEU0 */
257 andcc %g3, 0xff, %g0 /* IEU1 Group */
258 be,pn %icc, 4b /* CTI */
260 12: srlx %o2, 24, %g3 /* IEU0 */
261 andcc %g3, 0xff, %g0 /* IEU1 Group */
262 be,pn %icc, 4b /* CTI */
263 srlx %o2, 16, %g3 /* IEU0 */
265 andcc %g3, 0xff, %g0 /* IEU1 Group */
266 be,pn %icc, 4b /* CTI */
267 srlx %o2, 8, %g3 /* IEU0 */
268 andcc %g3, 0xff, %g0 /* IEU1 Group */
270 be,pn %icc, 4b /* CTI */
271 andcc %o2, 0xff, %g0 /* IEU1 Group */
272 be,pn %icc, 4b /* CTI */
273 sllx %g6, %g5, %o3 /* IEU0 */
275 ba,pt %xcc, 11b /* CTI Group */
276 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
278 libc_hidden_def(strcmp)