1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
18 /* If you consider tuning this algorithm, you should consult first:
19 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
29 /* Swap SIZE bytes between addresses A and B. These helpers are provided
30 along the generic one as an optimization. */
40 typedef uint32_t __attribute__ ((__may_alias__
)) u32_alias_t
;
41 typedef uint64_t __attribute__ ((__may_alias__
)) u64_alias_t
;
43 /* If this function returns true, elements can be safely copied using word
44 loads and stores. Otherwise, it might not be safe. BASE (as an integer)
45 must be a multiple of the word alignment. SIZE must be a multiple of
46 WORDSIZE. Since WORDSIZE must be a multiple of the word alignment, and
47 WORDSIZE is a power of two on all supported platforms, this function for
48 speed merely checks that BASE and SIZE are both multiples of the word
51 is_aligned (const void *base
, size_t size
, size_t wordsize
)
53 return (((uintptr_t) base
| size
) & (wordsize
- 1)) == 0;
57 swap_words_64 (void * restrict a
, void * restrict b
, size_t n
)
62 u64_alias_t t
= *(u64_alias_t
*)(a
+ n
);
63 *(u64_alias_t
*)(a
+ n
) = *(u64_alias_t
*)(b
+ n
);
64 *(u64_alias_t
*)(b
+ n
) = t
;
69 swap_words_32 (void * restrict a
, void * restrict b
, size_t n
)
74 u32_alias_t t
= *(u32_alias_t
*)(a
+ n
);
75 *(u32_alias_t
*)(a
+ n
) = *(u32_alias_t
*)(b
+ n
);
76 *(u32_alias_t
*)(b
+ n
) = t
;
80 /* Replace the indirect call with a serie of if statements. It should help
81 the branch predictor. */
83 do_swap (void * restrict a
, void * restrict b
, size_t size
,
84 enum swap_type_t swap_type
)
86 if (swap_type
== SWAP_WORDS_64
)
87 swap_words_64 (a
, b
, size
);
88 else if (swap_type
== SWAP_WORDS_32
)
89 swap_words_32 (a
, b
, size
);
91 __memswap (a
, b
, size
);
94 /* Establish the heap condition at index K, that is, the key at K will
95 not be less than either of its children, at 2 * K + 1 and 2 * K + 2
96 (if they exist). N is the last valid index. */
98 siftdown (void *base
, size_t size
, size_t k
, size_t n
,
99 enum swap_type_t swap_type
, __compar_d_fn_t cmp
, void *arg
)
101 /* There can only be a heap condition violation if there are
103 while (2 * k
+ 1 <= n
)
106 size_t j
= 2 * k
+ 1;
107 /* If the right child is larger, use it. */
108 if (j
< n
&& cmp (base
+ (j
* size
), base
+ ((j
+ 1) * size
), arg
) < 0)
111 /* If k is already >= to its children, we are done. */
112 if (j
== k
|| cmp (base
+ (k
* size
), base
+ (j
* size
), arg
) >= 0)
115 /* Heal the violation. */
116 do_swap (base
+ (size
* j
), base
+ (k
* size
), size
, swap_type
);
118 /* Swapping with j may have introduced a violation at j. Fix
119 it in the next loop iteration. */
124 /* Establish the heap condition for the indices 0 to N (inclusive). */
126 heapify (void *base
, size_t size
, size_t n
, enum swap_type_t swap_type
,
127 __compar_d_fn_t cmp
, void *arg
)
129 /* If n is odd, k = n / 2 has a left child at n, so this is the
130 largest index that can have a heap condition violation regarding
135 siftdown (base
, size
, k
, n
, swap_type
, cmp
, arg
);
141 static enum swap_type_t
142 get_swap_type (void *const pbase
, size_t size
)
144 if ((size
& (sizeof (uint32_t) - 1)) == 0
145 && ((uintptr_t) pbase
) % __alignof__ (uint32_t) == 0)
147 if (size
== sizeof (uint32_t))
148 return SWAP_WORDS_32
;
149 else if (size
== sizeof (uint64_t)
150 && ((uintptr_t) pbase
) % __alignof__ (uint64_t) == 0)
151 return SWAP_WORDS_64
;
157 /* A non-recursive heapsort with worst-case performance of O(nlog n) and
158 worst-case space complexity of O(1). It sorts the array starting at
159 BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
160 function used to swap elements, and CMP is the function used to compare
163 heapsort_r (void *base
, size_t n
, size_t size
, __compar_d_fn_t cmp
, void *arg
)
168 enum swap_type_t swap_type
= get_swap_type (base
, size
);
170 /* Build the binary heap, largest value at the base[0]. */
171 heapify (base
, size
, n
, swap_type
, cmp
, arg
);
175 /* Indices 0 .. n contain the binary heap. Extract the largest
176 element put it into the final position in the array. */
177 do_swap (base
, base
+ (n
* size
), size
, swap_type
);
179 /* The heap is now one element shorter. */
184 /* By swapping in elements 0 and the previous value of n (now at
185 n + 1), we likely introduced a heap condition violation. Fix
186 it for the reduced heap. */
187 siftdown (base
, size
, 0, n
, swap_type
, cmp
, arg
);
191 /* The maximum size in bytes required by mergesort that will be provided
192 through a buffer allocated in the stack. */
193 #define QSORT_STACK_SIZE 1024
195 /* Elements larger than this value will be sorted through indirect sorting
196 to minimize the need to memory swap calls. */
197 #define INDIRECT_SORT_SIZE_THRES 32
202 enum swap_type_t var
;
209 msort_with_tmp (const struct msort_param
*p
, void *b
, size_t n
)
220 b2
= (char *) b
+ (n1
* p
->s
);
222 msort_with_tmp (p
, b1
, n1
);
223 msort_with_tmp (p
, b2
, n2
);
226 const size_t s
= p
->s
;
227 __compar_d_fn_t cmp
= p
->cmp
;
232 while (n1
> 0 && n2
> 0)
234 if (cmp (b1
, b2
, arg
) <= 0)
236 *(u32_alias_t
*) tmp
= *(u32_alias_t
*) b1
;
237 b1
+= sizeof (u32_alias_t
);
242 *(u32_alias_t
*) tmp
= *(u32_alias_t
*) b2
;
243 b2
+= sizeof (u32_alias_t
);
246 tmp
+= sizeof (u32_alias_t
);
250 while (n1
> 0 && n2
> 0)
252 if (cmp (b1
, b2
, arg
) <= 0)
254 *(u64_alias_t
*) tmp
= *(u64_alias_t
*) b1
;
255 b1
+= sizeof (u64_alias_t
);
260 *(u64_alias_t
*) tmp
= *(u64_alias_t
*) b2
;
261 b2
+= sizeof (u64_alias_t
);
264 tmp
+= sizeof (u64_alias_t
);
268 while (n1
> 0 && n2
> 0)
270 if ((*cmp
) (*(const void **) b1
, *(const void **) b2
, arg
) <= 0)
272 *(void **) tmp
= *(void **) b1
;
273 b1
+= sizeof (void *);
278 *(void **) tmp
= *(void **) b2
;
279 b2
+= sizeof (void *);
282 tmp
+= sizeof (void *);
286 while (n1
> 0 && n2
> 0)
288 if (cmp (b1
, b2
, arg
) <= 0)
290 tmp
= (char *) __mempcpy (tmp
, b1
, s
);
296 tmp
= (char *) __mempcpy (tmp
, b2
, s
);
305 memcpy (tmp
, b1
, n1
* s
);
306 memcpy (b
, p
->t
, (n
- n2
) * s
);
311 indirect_msort_with_tmp (const struct msort_param
*p
, void *b
, size_t n
,
314 /* Indirect sorting. */
315 char *ip
= (char *) b
;
316 void **tp
= (void **) (p
->t
+ n
* sizeof (void *));
318 void *tmp_storage
= (void *) (tp
+ n
);
320 while ((void *) t
< tmp_storage
)
325 msort_with_tmp (p
, p
->t
+ n
* sizeof (void *), n
);
327 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
328 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
331 for (i
= 0, ip
= (char *) b
; i
< n
; i
++, ip
+= s
)
332 if ((kp
= tp
[i
]) != ip
)
336 memcpy (tmp_storage
, ip
, s
);
340 size_t k
= (kp
- (char *) b
) / s
;
350 memcpy (jp
, tmp_storage
, s
);
355 __qsort_r (void *const pbase
, size_t total_elems
, size_t size
,
356 __compar_d_fn_t cmp
, void *arg
)
358 if (total_elems
<= 1)
361 /* Align to the maximum size used by the swap optimization. */
362 _Alignas (uint64_t) char tmp
[QSORT_STACK_SIZE
];
363 size_t total_size
= total_elems
* size
;
366 if (size
> INDIRECT_SORT_SIZE_THRES
)
367 total_size
= 2 * total_elems
* sizeof (void *) + size
;
369 if (total_size
< sizeof buf
)
374 buf
= malloc (total_size
);
378 /* Fallback to heapsort in case of memory failure. */
379 heapsort_r (pbase
, total_elems
- 1, size
, cmp
, arg
);
384 if (size
> INDIRECT_SORT_SIZE_THRES
)
386 const struct msort_param msort_param
=
388 .s
= sizeof (void *),
391 .var
= SWAP_VOID_ARG
,
394 indirect_msort_with_tmp (&msort_param
, pbase
, total_elems
, size
);
398 const struct msort_param msort_param
=
403 .var
= get_swap_type (pbase
, size
),
406 msort_with_tmp (&msort_param
, pbase
, total_elems
);
412 libc_hidden_def (__qsort_r
)
413 weak_alias (__qsort_r
, qsort_r
)
416 qsort (void *b
, size_t n
, size_t s
, __compar_fn_t cmp
)
418 return __qsort_r (b
, n
, s
, (__compar_d_fn_t
) cmp
, NULL
);
420 libc_hidden_def (qsort
)