stdlib: Reinstate stable mergesort implementation on qsort
[glibc.git] / stdlib / qsort.c
blobb29882388e26cc6f67597418875f98b0f8bc6826
1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
18 /* If you consider tuning this algorithm, you should consult first:
19 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
22 #include <errno.h>
23 #include <limits.h>
24 #include <memswap.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdbool.h>
29 /* Swap SIZE bytes between addresses A and B. These helpers are provided
30 along the generic one as an optimization. */
32 enum swap_type_t
34 SWAP_WORDS_64,
35 SWAP_WORDS_32,
36 SWAP_VOID_ARG,
37 SWAP_BYTES
40 typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
41 typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
43 /* If this function returns true, elements can be safely copied using word
44 loads and stores. Otherwise, it might not be safe. BASE (as an integer)
45 must be a multiple of the word alignment. SIZE must be a multiple of
46 WORDSIZE. Since WORDSIZE must be a multiple of the word alignment, and
47 WORDSIZE is a power of two on all supported platforms, this function for
48 speed merely checks that BASE and SIZE are both multiples of the word
49 size. */
50 static inline bool
51 is_aligned (const void *base, size_t size, size_t wordsize)
53 return (((uintptr_t) base | size) & (wordsize - 1)) == 0;
56 static inline void
57 swap_words_64 (void * restrict a, void * restrict b, size_t n)
61 n -= 8;
62 u64_alias_t t = *(u64_alias_t *)(a + n);
63 *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n);
64 *(u64_alias_t *)(b + n) = t;
65 } while (n);
68 static inline void
69 swap_words_32 (void * restrict a, void * restrict b, size_t n)
73 n -= 4;
74 u32_alias_t t = *(u32_alias_t *)(a + n);
75 *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n);
76 *(u32_alias_t *)(b + n) = t;
77 } while (n);
80 /* Replace the indirect call with a serie of if statements. It should help
81 the branch predictor. */
82 static void
83 do_swap (void * restrict a, void * restrict b, size_t size,
84 enum swap_type_t swap_type)
86 if (swap_type == SWAP_WORDS_64)
87 swap_words_64 (a, b, size);
88 else if (swap_type == SWAP_WORDS_32)
89 swap_words_32 (a, b, size);
90 else
91 __memswap (a, b, size);
94 /* Establish the heap condition at index K, that is, the key at K will
95 not be less than either of its children, at 2 * K + 1 and 2 * K + 2
96 (if they exist). N is the last valid index. */
97 static inline void
98 siftdown (void *base, size_t size, size_t k, size_t n,
99 enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg)
101 /* There can only be a heap condition violation if there are
102 children. */
103 while (2 * k + 1 <= n)
105 /* Left child. */
106 size_t j = 2 * k + 1;
107 /* If the right child is larger, use it. */
108 if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0)
109 j++;
111 /* If k is already >= to its children, we are done. */
112 if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0)
113 break;
115 /* Heal the violation. */
116 do_swap (base + (size * j), base + (k * size), size, swap_type);
118 /* Swapping with j may have introduced a violation at j. Fix
119 it in the next loop iteration. */
120 k = j;
124 /* Establish the heap condition for the indices 0 to N (inclusive). */
125 static inline void
126 heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
127 __compar_d_fn_t cmp, void *arg)
129 /* If n is odd, k = n / 2 has a left child at n, so this is the
130 largest index that can have a heap condition violation regarding
131 its children. */
132 size_t k = n / 2;
133 while (1)
135 siftdown (base, size, k, n, swap_type, cmp, arg);
136 if (k-- == 0)
137 break;
141 static enum swap_type_t
142 get_swap_type (void *const pbase, size_t size)
144 if ((size & (sizeof (uint32_t) - 1)) == 0
145 && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
147 if (size == sizeof (uint32_t))
148 return SWAP_WORDS_32;
149 else if (size == sizeof (uint64_t)
150 && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
151 return SWAP_WORDS_64;
153 return SWAP_BYTES;
157 /* A non-recursive heapsort with worst-case performance of O(nlog n) and
158 worst-case space complexity of O(1). It sorts the array starting at
159 BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
160 function used to swap elements, and CMP is the function used to compare
161 elements. */
162 static void
163 heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
165 if (n <= 1)
166 return;
168 enum swap_type_t swap_type = get_swap_type (base, size);
170 /* Build the binary heap, largest value at the base[0]. */
171 heapify (base, size, n, swap_type, cmp, arg);
173 while (true)
175 /* Indices 0 .. n contain the binary heap. Extract the largest
176 element put it into the final position in the array. */
177 do_swap (base, base + (n * size), size, swap_type);
179 /* The heap is now one element shorter. */
180 n--;
181 if (n == 0)
182 break;
184 /* By swapping in elements 0 and the previous value of n (now at
185 n + 1), we likely introduced a heap condition violation. Fix
186 it for the reduced heap. */
187 siftdown (base, size, 0, n, swap_type, cmp, arg);
191 /* The maximum size in bytes required by mergesort that will be provided
192 through a buffer allocated in the stack. */
193 #define QSORT_STACK_SIZE 1024
195 /* Elements larger than this value will be sorted through indirect sorting
196 to minimize the need to memory swap calls. */
197 #define INDIRECT_SORT_SIZE_THRES 32
199 struct msort_param
201 size_t s;
202 enum swap_type_t var;
203 __compar_d_fn_t cmp;
204 void *arg;
205 char *t;
208 static void
209 msort_with_tmp (const struct msort_param *p, void *b, size_t n)
211 char *b1, *b2;
212 size_t n1, n2;
214 if (n <= 1)
215 return;
217 n1 = n / 2;
218 n2 = n - n1;
219 b1 = b;
220 b2 = (char *) b + (n1 * p->s);
222 msort_with_tmp (p, b1, n1);
223 msort_with_tmp (p, b2, n2);
225 char *tmp = p->t;
226 const size_t s = p->s;
227 __compar_d_fn_t cmp = p->cmp;
228 void *arg = p->arg;
229 switch (p->var)
231 case SWAP_WORDS_32:
232 while (n1 > 0 && n2 > 0)
234 if (cmp (b1, b2, arg) <= 0)
236 *(u32_alias_t *) tmp = *(u32_alias_t *) b1;
237 b1 += sizeof (u32_alias_t);
238 --n1;
240 else
242 *(u32_alias_t *) tmp = *(u32_alias_t *) b2;
243 b2 += sizeof (u32_alias_t);
244 --n2;
246 tmp += sizeof (u32_alias_t);
248 break;
249 case SWAP_WORDS_64:
250 while (n1 > 0 && n2 > 0)
252 if (cmp (b1, b2, arg) <= 0)
254 *(u64_alias_t *) tmp = *(u64_alias_t *) b1;
255 b1 += sizeof (u64_alias_t);
256 --n1;
258 else
260 *(u64_alias_t *) tmp = *(u64_alias_t *) b2;
261 b2 += sizeof (u64_alias_t);
262 --n2;
264 tmp += sizeof (u64_alias_t);
266 break;
267 case SWAP_VOID_ARG:
268 while (n1 > 0 && n2 > 0)
270 if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
272 *(void **) tmp = *(void **) b1;
273 b1 += sizeof (void *);
274 --n1;
276 else
278 *(void **) tmp = *(void **) b2;
279 b2 += sizeof (void *);
280 --n2;
282 tmp += sizeof (void *);
284 break;
285 default:
286 while (n1 > 0 && n2 > 0)
288 if (cmp (b1, b2, arg) <= 0)
290 tmp = (char *) __mempcpy (tmp, b1, s);
291 b1 += s;
292 --n1;
294 else
296 tmp = (char *) __mempcpy (tmp, b2, s);
297 b2 += s;
298 --n2;
301 break;
304 if (n1 > 0)
305 memcpy (tmp, b1, n1 * s);
306 memcpy (b, p->t, (n - n2) * s);
309 static void
310 __attribute_used__
311 indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
312 size_t s)
314 /* Indirect sorting. */
315 char *ip = (char *) b;
316 void **tp = (void **) (p->t + n * sizeof (void *));
317 void **t = tp;
318 void *tmp_storage = (void *) (tp + n);
320 while ((void *) t < tmp_storage)
322 *t++ = ip;
323 ip += s;
325 msort_with_tmp (p, p->t + n * sizeof (void *), n);
327 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
328 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
329 char *kp;
330 size_t i;
331 for (i = 0, ip = (char *) b; i < n; i++, ip += s)
332 if ((kp = tp[i]) != ip)
334 size_t j = i;
335 char *jp = ip;
336 memcpy (tmp_storage, ip, s);
340 size_t k = (kp - (char *) b) / s;
341 tp[j] = jp;
342 memcpy (jp, kp, s);
343 j = k;
344 jp = kp;
345 kp = tp[k];
347 while (kp != ip);
349 tp[j] = jp;
350 memcpy (jp, tmp_storage, s);
354 void
355 __qsort_r (void *const pbase, size_t total_elems, size_t size,
356 __compar_d_fn_t cmp, void *arg)
358 if (total_elems <= 1)
359 return;
361 /* Align to the maximum size used by the swap optimization. */
362 _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE];
363 size_t total_size = total_elems * size;
364 char *buf;
366 if (size > INDIRECT_SORT_SIZE_THRES)
367 total_size = 2 * total_elems * sizeof (void *) + size;
369 if (total_size < sizeof buf)
370 buf = tmp;
371 else
373 int save = errno;
374 buf = malloc (total_size);
375 __set_errno (save);
376 if (buf == NULL)
378 /* Fallback to heapsort in case of memory failure. */
379 heapsort_r (pbase, total_elems - 1, size, cmp, arg);
380 return;
384 if (size > INDIRECT_SORT_SIZE_THRES)
386 const struct msort_param msort_param =
388 .s = sizeof (void *),
389 .cmp = cmp,
390 .arg = arg,
391 .var = SWAP_VOID_ARG,
392 .t = buf,
394 indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
396 else
398 const struct msort_param msort_param =
400 .s = size,
401 .cmp = cmp,
402 .arg = arg,
403 .var = get_swap_type (pbase, size),
404 .t = buf,
406 msort_with_tmp (&msort_param, pbase, total_elems);
409 if (buf != tmp)
410 free (buf);
412 libc_hidden_def (__qsort_r)
413 weak_alias (__qsort_r, qsort_r)
415 void
416 qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
418 return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
420 libc_hidden_def (qsort)