Update.
[glibc.git] / sysdeps / generic / memcmp.c
blobd9f93bab61ad4fc7c8bb459fc9c4948f162fcec5
1 /* Copyright (C) 1991, 1993, 1995 Free Software Foundation, Inc.
2 Contributed by Torbjorn Granlund (tege@sics.se).
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
23 #undef __ptr_t
24 #if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
25 #define __ptr_t void *
26 #else /* Not C++ or ANSI C. */
27 #undef const
28 #define const
29 #define __ptr_t char *
30 #endif /* C++ or ANSI C. */
32 #if defined (HAVE_STRING_H) || defined (_LIBC)
33 #include <string.h>
34 #endif
36 #ifdef _LIBC
38 #include <memcopy.h>
40 #else /* Not in the GNU C library. */
42 #include <sys/types.h>
44 /* Type to use for aligned memory operations.
45 This should normally be the biggest type supported by a single load
46 and store. Must be an unsigned type. */
47 #define op_t unsigned long int
48 #define OPSIZ (sizeof(op_t))
50 /* Threshold value for when to enter the unrolled loops. */
51 #define OP_T_THRES 16
53 /* Type to use for unaligned operations. */
54 typedef unsigned char byte;
56 #ifndef WORDS_BIGENDIAN
57 #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
58 #else
59 #define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
60 #endif
62 #endif /* In the GNU C library. */
64 #ifdef WORDS_BIGENDIAN
65 #define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
66 #else
67 #define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
68 #endif
70 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
72 /* The strategy of this memcmp is:
74 1. Compare bytes until one of the block pointers is aligned.
76 2. Compare using memcmp_common_alignment or
77 memcmp_not_common_alignment, regarding the alignment of the other
78 block after the initial byte operations. The maximum number of
79 full words (of type op_t) are compared in this way.
81 3. Compare the few remaining bytes. */
83 #ifndef WORDS_BIGENDIAN
84 /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
85 A and B are known to be different.
86 This is needed only on little-endian machines. */
88 static int memcmp_bytes __P((op_t, op_t));
90 #ifdef __GNUC__
91 __inline
92 #endif
93 static int
94 memcmp_bytes (a, b)
95 op_t a, b;
97 long int srcp1 = (long int) &a;
98 long int srcp2 = (long int) &b;
99 op_t a0, b0;
103 a0 = ((byte *) srcp1)[0];
104 b0 = ((byte *) srcp2)[0];
105 srcp1 += 1;
106 srcp2 += 1;
108 while (a0 == b0);
109 return a0 - b0;
111 #endif
113 static int memcmp_common_alignment __P((long, long, size_t));
115 /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
116 objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
117 memory operations on `op_t's. */
118 #ifdef __GNUC__
119 __inline
120 #endif
121 static int
122 memcmp_common_alignment (srcp1, srcp2, len)
123 long int srcp1;
124 long int srcp2;
125 size_t len;
127 op_t a0, a1;
128 op_t b0, b1;
130 switch (len % 4)
132 case 2:
133 a0 = ((op_t *) srcp1)[0];
134 b0 = ((op_t *) srcp2)[0];
135 srcp1 -= 2 * OPSIZ;
136 srcp2 -= 2 * OPSIZ;
137 len += 2;
138 goto do1;
139 case 3:
140 a1 = ((op_t *) srcp1)[0];
141 b1 = ((op_t *) srcp2)[0];
142 srcp1 -= OPSIZ;
143 srcp2 -= OPSIZ;
144 len += 1;
145 goto do2;
146 case 0:
147 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
148 return 0;
149 a0 = ((op_t *) srcp1)[0];
150 b0 = ((op_t *) srcp2)[0];
151 goto do3;
152 case 1:
153 a1 = ((op_t *) srcp1)[0];
154 b1 = ((op_t *) srcp2)[0];
155 srcp1 += OPSIZ;
156 srcp2 += OPSIZ;
157 len -= 1;
158 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
159 goto do0;
160 /* Fall through. */
165 a0 = ((op_t *) srcp1)[0];
166 b0 = ((op_t *) srcp2)[0];
167 if (a1 != b1)
168 return CMP_LT_OR_GT (a1, b1);
170 do3:
171 a1 = ((op_t *) srcp1)[1];
172 b1 = ((op_t *) srcp2)[1];
173 if (a0 != b0)
174 return CMP_LT_OR_GT (a0, b0);
176 do2:
177 a0 = ((op_t *) srcp1)[2];
178 b0 = ((op_t *) srcp2)[2];
179 if (a1 != b1)
180 return CMP_LT_OR_GT (a1, b1);
182 do1:
183 a1 = ((op_t *) srcp1)[3];
184 b1 = ((op_t *) srcp2)[3];
185 if (a0 != b0)
186 return CMP_LT_OR_GT (a0, b0);
188 srcp1 += 4 * OPSIZ;
189 srcp2 += 4 * OPSIZ;
190 len -= 4;
192 while (len != 0);
194 /* This is the right position for do0. Please don't move
195 it into the loop. */
196 do0:
197 if (a1 != b1)
198 return CMP_LT_OR_GT (a1, b1);
199 return 0;
202 static int memcmp_not_common_alignment __P((long, long, size_t));
204 /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
205 `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
206 operations on `op_t', but SRCP1 *should be unaligned*. */
207 #ifdef __GNUC__
208 __inline
209 #endif
210 static int
211 memcmp_not_common_alignment (srcp1, srcp2, len)
212 long int srcp1;
213 long int srcp2;
214 size_t len;
216 op_t a0, a1, a2, a3;
217 op_t b0, b1, b2, b3;
218 op_t x;
219 int shl, shr;
221 /* Calculate how to shift a word read at the memory operation
222 aligned srcp1 to make it aligned for comparison. */
224 shl = 8 * (srcp1 % OPSIZ);
225 shr = 8 * OPSIZ - shl;
227 /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
228 it points in the middle of. */
229 srcp1 &= -OPSIZ;
231 switch (len % 4)
233 case 2:
234 a1 = ((op_t *) srcp1)[0];
235 a2 = ((op_t *) srcp1)[1];
236 b2 = ((op_t *) srcp2)[0];
237 srcp1 -= 1 * OPSIZ;
238 srcp2 -= 2 * OPSIZ;
239 len += 2;
240 goto do1;
241 case 3:
242 a0 = ((op_t *) srcp1)[0];
243 a1 = ((op_t *) srcp1)[1];
244 b1 = ((op_t *) srcp2)[0];
245 srcp2 -= 1 * OPSIZ;
246 len += 1;
247 goto do2;
248 case 0:
249 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
250 return 0;
251 a3 = ((op_t *) srcp1)[0];
252 a0 = ((op_t *) srcp1)[1];
253 b0 = ((op_t *) srcp2)[0];
254 srcp1 += 1 * OPSIZ;
255 goto do3;
256 case 1:
257 a2 = ((op_t *) srcp1)[0];
258 a3 = ((op_t *) srcp1)[1];
259 b3 = ((op_t *) srcp2)[0];
260 srcp1 += 2 * OPSIZ;
261 srcp2 += 1 * OPSIZ;
262 len -= 1;
263 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
264 goto do0;
265 /* Fall through. */
270 a0 = ((op_t *) srcp1)[0];
271 b0 = ((op_t *) srcp2)[0];
272 x = MERGE(a2, shl, a3, shr);
273 if (x != b3)
274 return CMP_LT_OR_GT (x, b3);
276 do3:
277 a1 = ((op_t *) srcp1)[1];
278 b1 = ((op_t *) srcp2)[1];
279 x = MERGE(a3, shl, a0, shr);
280 if (x != b0)
281 return CMP_LT_OR_GT (x, b0);
283 do2:
284 a2 = ((op_t *) srcp1)[2];
285 b2 = ((op_t *) srcp2)[2];
286 x = MERGE(a0, shl, a1, shr);
287 if (x != b1)
288 return CMP_LT_OR_GT (x, b1);
290 do1:
291 a3 = ((op_t *) srcp1)[3];
292 b3 = ((op_t *) srcp2)[3];
293 x = MERGE(a1, shl, a2, shr);
294 if (x != b2)
295 return CMP_LT_OR_GT (x, b2);
297 srcp1 += 4 * OPSIZ;
298 srcp2 += 4 * OPSIZ;
299 len -= 4;
301 while (len != 0);
303 /* This is the right position for do0. Please don't move
304 it into the loop. */
305 do0:
306 x = MERGE(a2, shl, a3, shr);
307 if (x != b3)
308 return CMP_LT_OR_GT (x, b3);
309 return 0;
313 memcmp (s1, s2, len)
314 const __ptr_t s1;
315 const __ptr_t s2;
316 size_t len;
318 op_t a0;
319 op_t b0;
320 long int srcp1 = (long int) s1;
321 long int srcp2 = (long int) s2;
322 op_t res;
324 if (len >= OP_T_THRES)
326 /* There are at least some bytes to compare. No need to test
327 for LEN == 0 in this alignment loop. */
328 while (srcp2 % OPSIZ != 0)
330 a0 = ((byte *) srcp1)[0];
331 b0 = ((byte *) srcp2)[0];
332 srcp1 += 1;
333 srcp2 += 1;
334 res = a0 - b0;
335 if (res != 0)
336 return res;
337 len -= 1;
340 /* SRCP2 is now aligned for memory operations on `op_t'.
341 SRCP1 alignment determines if we can do a simple,
342 aligned compare or need to shuffle bits. */
344 if (srcp1 % OPSIZ == 0)
345 res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
346 else
347 res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
348 if (res != 0)
349 return res;
351 /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
352 srcp1 += len & -OPSIZ;
353 srcp2 += len & -OPSIZ;
354 len %= OPSIZ;
357 /* There are just a few bytes to compare. Use byte memory operations. */
358 while (len != 0)
360 a0 = ((byte *) srcp1)[0];
361 b0 = ((byte *) srcp2)[0];
362 srcp1 += 1;
363 srcp2 += 1;
364 res = a0 - b0;
365 if (res != 0)
366 return res;
367 len -= 1;
370 return 0;
373 #ifdef weak_alias
374 #undef bcmp
375 weak_alias (memcmp, bcmp)
376 #endif