exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / memcmp.c
blob8bd2bbd277ca8b4e85a01803ccd8f4d7bf293586
1 /* Copyright (C) 1991, 1993, 1995, 1997-1998, 2003, 2006, 2009-2024 Free
2 Software Foundation, Inc.
4 Contributed by Torbjorn Granlund (tege@sics.se).
6 NOTE: The canonical source of this file is maintained with the GNU C Library.
7 Bugs can be reported to bug-glibc@prep.ai.mit.edu.
9 This file is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as
11 published by the Free Software Foundation; either version 2.1 of the
12 License, or (at your option) any later version.
14 This file is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public License
20 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 #ifndef _LIBC
23 # include <config.h>
24 #endif
26 #include <string.h>
28 #include <stdint.h>
30 #ifdef _LIBC
32 # undef memcmp
34 # include <memcopy.h>
35 # include <endian.h>
37 # if __BYTE_ORDER == __BIG_ENDIAN
38 # define WORDS_BIGENDIAN
39 # endif
41 #else /* Not in the GNU C library. */
43 # include <sys/types.h>
45 /* Type to use for aligned memory operations.
46 This should normally be the biggest type supported by a single load
47 and store. Must be an unsigned type. */
48 # define op_t unsigned long int
49 # define OPSIZ (sizeof(op_t))
51 /* Threshold value for when to enter the unrolled loops. */
52 # define OP_T_THRES 16
54 /* Type to use for unaligned operations. */
55 typedef unsigned char byte;
57 # ifndef WORDS_BIGENDIAN
58 # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
59 # else
60 # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
61 # endif
63 #endif /* In the GNU C library. */
65 #ifdef WORDS_BIGENDIAN
66 # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
67 #else
68 # define CMP_LT_OR_GT(a, b) memcmp_bytes (a, b)
69 #endif
71 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
73 /* The strategy of this memcmp is:
75 1. Compare bytes until one of the block pointers is aligned.
77 2. Compare using memcmp_common_alignment or
78 memcmp_not_common_alignment, regarding the alignment of the other
79 block after the initial byte operations. The maximum number of
80 full words (of type op_t) are compared in this way.
82 3. Compare the few remaining bytes. */
84 #ifndef WORDS_BIGENDIAN
85 /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
86 A and B are known to be different.
87 This is needed only on little-endian machines. */
89 # ifdef __GNUC__
90 __inline
91 # endif
92 static int
93 memcmp_bytes (op_t a, op_t b)
95 const byte *srcp1 = (const byte *) &a;
96 const byte *srcp2 = (const byte *) &b;
97 op_t a0, b0;
101 a0 = srcp1[0];
102 b0 = srcp2[0];
103 srcp1 += 1;
104 srcp2 += 1;
106 while (a0 == b0);
107 return a0 - b0;
109 #endif
111 /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN 'op_t'
112 objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
113 memory operations on 'op_t's. */
114 #ifdef __GNUC__
115 __inline
116 #endif
117 static int
118 memcmp_common_alignment (uintptr_t srcp1, uintptr_t srcp2, size_t len)
120 op_t a0, a1;
121 op_t b0, b1;
123 switch (len % 4)
125 default: /* Avoid warning about uninitialized local variables. */
126 case 2:
127 a0 = ((op_t *) srcp1)[0];
128 b0 = ((op_t *) srcp2)[0];
129 srcp1 -= 2 * OPSIZ;
130 srcp2 -= 2 * OPSIZ;
131 len += 2;
132 goto do1;
133 case 3:
134 a1 = ((op_t *) srcp1)[0];
135 b1 = ((op_t *) srcp2)[0];
136 srcp1 -= OPSIZ;
137 srcp2 -= OPSIZ;
138 len += 1;
139 goto do2;
140 case 0:
141 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
142 return 0;
143 a0 = ((op_t *) srcp1)[0];
144 b0 = ((op_t *) srcp2)[0];
145 goto do3;
146 case 1:
147 a1 = ((op_t *) srcp1)[0];
148 b1 = ((op_t *) srcp2)[0];
149 srcp1 += OPSIZ;
150 srcp2 += OPSIZ;
151 len -= 1;
152 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
153 goto do0;
154 /* Fall through. */
159 a0 = ((op_t *) srcp1)[0];
160 b0 = ((op_t *) srcp2)[0];
161 if (a1 != b1)
162 return CMP_LT_OR_GT (a1, b1);
164 do3:
165 a1 = ((op_t *) srcp1)[1];
166 b1 = ((op_t *) srcp2)[1];
167 if (a0 != b0)
168 return CMP_LT_OR_GT (a0, b0);
170 do2:
171 a0 = ((op_t *) srcp1)[2];
172 b0 = ((op_t *) srcp2)[2];
173 if (a1 != b1)
174 return CMP_LT_OR_GT (a1, b1);
176 do1:
177 a1 = ((op_t *) srcp1)[3];
178 b1 = ((op_t *) srcp2)[3];
179 if (a0 != b0)
180 return CMP_LT_OR_GT (a0, b0);
182 srcp1 += 4 * OPSIZ;
183 srcp2 += 4 * OPSIZ;
184 len -= 4;
186 while (len != 0);
188 /* This is the right position for do0. Please don't move
189 it into the loop. */
190 do0:
191 if (a1 != b1)
192 return CMP_LT_OR_GT (a1, b1);
193 return 0;
196 /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
197 'op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
198 operations on 'op_t', but SRCP1 *should be unaligned*. */
199 #ifdef __GNUC__
200 __inline
201 #endif
202 static int
203 memcmp_not_common_alignment (uintptr_t srcp1, uintptr_t srcp2, size_t len)
205 op_t a0, a1, a2, a3;
206 op_t b0, b1, b2, b3;
207 op_t x;
208 int shl, shr;
210 /* Calculate how to shift a word read at the memory operation
211 aligned srcp1 to make it aligned for comparison. */
213 shl = 8 * (srcp1 % OPSIZ);
214 shr = 8 * OPSIZ - shl;
216 /* Make SRCP1 aligned by rounding it down to the beginning of the 'op_t'
217 it points in the middle of. */
218 srcp1 &= -OPSIZ;
220 switch (len % 4)
222 default: /* Avoid warning about uninitialized local variables. */
223 case 2:
224 a1 = ((op_t *) srcp1)[0];
225 a2 = ((op_t *) srcp1)[1];
226 b2 = ((op_t *) srcp2)[0];
227 srcp1 -= 1 * OPSIZ;
228 srcp2 -= 2 * OPSIZ;
229 len += 2;
230 goto do1;
231 case 3:
232 a0 = ((op_t *) srcp1)[0];
233 a1 = ((op_t *) srcp1)[1];
234 b1 = ((op_t *) srcp2)[0];
235 srcp2 -= 1 * OPSIZ;
236 len += 1;
237 goto do2;
238 case 0:
239 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
240 return 0;
241 a3 = ((op_t *) srcp1)[0];
242 a0 = ((op_t *) srcp1)[1];
243 b0 = ((op_t *) srcp2)[0];
244 srcp1 += 1 * OPSIZ;
245 goto do3;
246 case 1:
247 a2 = ((op_t *) srcp1)[0];
248 a3 = ((op_t *) srcp1)[1];
249 b3 = ((op_t *) srcp2)[0];
250 srcp1 += 2 * OPSIZ;
251 srcp2 += 1 * OPSIZ;
252 len -= 1;
253 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
254 goto do0;
255 /* Fall through. */
260 a0 = ((op_t *) srcp1)[0];
261 b0 = ((op_t *) srcp2)[0];
262 x = MERGE (a2, shl, a3, shr);
263 if (x != b3)
264 return CMP_LT_OR_GT (x, b3);
266 do3:
267 a1 = ((op_t *) srcp1)[1];
268 b1 = ((op_t *) srcp2)[1];
269 x = MERGE (a3, shl, a0, shr);
270 if (x != b0)
271 return CMP_LT_OR_GT (x, b0);
273 do2:
274 a2 = ((op_t *) srcp1)[2];
275 b2 = ((op_t *) srcp2)[2];
276 x = MERGE (a0, shl, a1, shr);
277 if (x != b1)
278 return CMP_LT_OR_GT (x, b1);
280 do1:
281 a3 = ((op_t *) srcp1)[3];
282 b3 = ((op_t *) srcp2)[3];
283 x = MERGE (a1, shl, a2, shr);
284 if (x != b2)
285 return CMP_LT_OR_GT (x, b2);
287 srcp1 += 4 * OPSIZ;
288 srcp2 += 4 * OPSIZ;
289 len -= 4;
291 while (len != 0);
293 /* This is the right position for do0. Please don't move
294 it into the loop. */
295 do0:
296 x = MERGE (a2, shl, a3, shr);
297 if (x != b3)
298 return CMP_LT_OR_GT (x, b3);
299 return 0;
303 rpl_memcmp (const void *s1, const void *s2, size_t len)
305 op_t a0;
306 op_t b0;
307 uintptr_t srcp1 = (uintptr_t) s1;
308 uintptr_t srcp2 = (uintptr_t) s2;
309 op_t res;
311 if (len >= OP_T_THRES)
313 /* There are at least some bytes to compare. No need to test
314 for LEN == 0 in this alignment loop. */
315 while (srcp2 % OPSIZ != 0)
317 a0 = ((byte *) srcp1)[0];
318 b0 = ((byte *) srcp2)[0];
319 srcp1 += 1;
320 srcp2 += 1;
321 res = a0 - b0;
322 if (res != 0)
323 return res;
324 len -= 1;
327 /* SRCP2 is now aligned for memory operations on 'op_t'.
328 SRCP1 alignment determines if we can do a simple,
329 aligned compare or need to shuffle bits. */
331 if (srcp1 % OPSIZ == 0)
332 res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
333 else
334 res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
335 if (res != 0)
336 return res;
338 /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
339 srcp1 += len & -OPSIZ;
340 srcp2 += len & -OPSIZ;
341 len %= OPSIZ;
344 /* There are just a few bytes to compare. Use byte memory operations. */
345 while (len != 0)
347 a0 = ((byte *) srcp1)[0];
348 b0 = ((byte *) srcp2)[0];
349 srcp1 += 1;
350 srcp2 += 1;
351 res = a0 - b0;
352 if (res != 0)
353 return res;
354 len -= 1;
357 return 0;
360 #ifdef weak_alias
361 # undef bcmp
362 weak_alias (memcmp, bcmp)
363 #endif