1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
18 /* If you consider tuning this algorithm, you should consult first:
19 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
29 /* Swap SIZE bytes between addresses A and B. These helpers are provided
30 along the generic one as an optimization. */
40 typedef uint32_t __attribute__ ((__may_alias__
)) u32_alias_t
;
41 typedef uint64_t __attribute__ ((__may_alias__
)) u64_alias_t
;
44 swap_words_64 (void * restrict a
, void * restrict b
, size_t n
)
49 u64_alias_t t
= *(u64_alias_t
*)(a
+ n
);
50 *(u64_alias_t
*)(a
+ n
) = *(u64_alias_t
*)(b
+ n
);
51 *(u64_alias_t
*)(b
+ n
) = t
;
56 swap_words_32 (void * restrict a
, void * restrict b
, size_t n
)
61 u32_alias_t t
= *(u32_alias_t
*)(a
+ n
);
62 *(u32_alias_t
*)(a
+ n
) = *(u32_alias_t
*)(b
+ n
);
63 *(u32_alias_t
*)(b
+ n
) = t
;
67 /* Replace the indirect call with a serie of if statements. It should help
68 the branch predictor. */
70 do_swap (void * restrict a
, void * restrict b
, size_t size
,
71 enum swap_type_t swap_type
)
73 if (swap_type
== SWAP_WORDS_64
)
74 swap_words_64 (a
, b
, size
);
75 else if (swap_type
== SWAP_WORDS_32
)
76 swap_words_32 (a
, b
, size
);
78 __memswap (a
, b
, size
);
81 /* Establish the heap condition at index K, that is, the key at K will
82 not be less than either of its children, at 2 * K + 1 and 2 * K + 2
83 (if they exist). N is the last valid index. */
85 siftdown (void *base
, size_t size
, size_t k
, size_t n
,
86 enum swap_type_t swap_type
, __compar_d_fn_t cmp
, void *arg
)
88 /* There can only be a heap condition violation if there are
90 while (2 * k
+ 1 <= n
)
94 /* If the right child is larger, use it. */
95 if (j
< n
&& cmp (base
+ (j
* size
), base
+ ((j
+ 1) * size
), arg
) < 0)
98 /* If k is already >= to its children, we are done. */
99 if (j
== k
|| cmp (base
+ (k
* size
), base
+ (j
* size
), arg
) >= 0)
102 /* Heal the violation. */
103 do_swap (base
+ (size
* j
), base
+ (k
* size
), size
, swap_type
);
105 /* Swapping with j may have introduced a violation at j. Fix
106 it in the next loop iteration. */
111 /* Establish the heap condition for the indices 0 to N (inclusive). */
113 heapify (void *base
, size_t size
, size_t n
, enum swap_type_t swap_type
,
114 __compar_d_fn_t cmp
, void *arg
)
116 /* If n is odd, k = n / 2 has a left child at n, so this is the
117 largest index that can have a heap condition violation regarding
122 siftdown (base
, size
, k
, n
, swap_type
, cmp
, arg
);
128 static enum swap_type_t
129 get_swap_type (void *const pbase
, size_t size
)
131 if ((size
& (sizeof (uint32_t) - 1)) == 0
132 && ((uintptr_t) pbase
) % __alignof__ (uint32_t) == 0)
134 if (size
== sizeof (uint32_t))
135 return SWAP_WORDS_32
;
136 else if (size
== sizeof (uint64_t)
137 && ((uintptr_t) pbase
) % __alignof__ (uint64_t) == 0)
138 return SWAP_WORDS_64
;
144 /* A non-recursive heapsort with worst-case performance of O(nlog n) and
145 worst-case space complexity of O(1). It sorts the array starting at
146 BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
147 function used to swap elements, and CMP is the function used to compare
150 heapsort_r (void *base
, size_t n
, size_t size
, __compar_d_fn_t cmp
, void *arg
)
155 enum swap_type_t swap_type
= get_swap_type (base
, size
);
157 /* Build the binary heap, largest value at the base[0]. */
158 heapify (base
, size
, n
, swap_type
, cmp
, arg
);
162 /* Indices 0 .. n contain the binary heap. Extract the largest
163 element put it into the final position in the array. */
164 do_swap (base
, base
+ (n
* size
), size
, swap_type
);
166 /* The heap is now one element shorter. */
171 /* By swapping in elements 0 and the previous value of n (now at
172 n + 1), we likely introduced a heap condition violation. Fix
173 it for the reduced heap. */
174 siftdown (base
, size
, 0, n
, swap_type
, cmp
, arg
);
178 /* The maximum size in bytes required by mergesort that will be provided
179 through a buffer allocated in the stack. */
180 #define QSORT_STACK_SIZE 1024
182 /* Elements larger than this value will be sorted through indirect sorting
183 to minimize the need to memory swap calls. */
184 #define INDIRECT_SORT_SIZE_THRES 32
189 enum swap_type_t var
;
196 msort_with_tmp (const struct msort_param
*p
, void *b
, size_t n
)
207 b2
= (char *) b
+ (n1
* p
->s
);
209 msort_with_tmp (p
, b1
, n1
);
210 msort_with_tmp (p
, b2
, n2
);
213 const size_t s
= p
->s
;
214 __compar_d_fn_t cmp
= p
->cmp
;
219 while (n1
> 0 && n2
> 0)
221 if (cmp (b1
, b2
, arg
) <= 0)
223 *(u32_alias_t
*) tmp
= *(u32_alias_t
*) b1
;
224 b1
+= sizeof (u32_alias_t
);
229 *(u32_alias_t
*) tmp
= *(u32_alias_t
*) b2
;
230 b2
+= sizeof (u32_alias_t
);
233 tmp
+= sizeof (u32_alias_t
);
237 while (n1
> 0 && n2
> 0)
239 if (cmp (b1
, b2
, arg
) <= 0)
241 *(u64_alias_t
*) tmp
= *(u64_alias_t
*) b1
;
242 b1
+= sizeof (u64_alias_t
);
247 *(u64_alias_t
*) tmp
= *(u64_alias_t
*) b2
;
248 b2
+= sizeof (u64_alias_t
);
251 tmp
+= sizeof (u64_alias_t
);
255 while (n1
> 0 && n2
> 0)
257 if ((*cmp
) (*(const void **) b1
, *(const void **) b2
, arg
) <= 0)
259 *(void **) tmp
= *(void **) b1
;
260 b1
+= sizeof (void *);
265 *(void **) tmp
= *(void **) b2
;
266 b2
+= sizeof (void *);
269 tmp
+= sizeof (void *);
273 while (n1
> 0 && n2
> 0)
275 if (cmp (b1
, b2
, arg
) <= 0)
277 tmp
= (char *) __mempcpy (tmp
, b1
, s
);
283 tmp
= (char *) __mempcpy (tmp
, b2
, s
);
292 memcpy (tmp
, b1
, n1
* s
);
293 memcpy (b
, p
->t
, (n
- n2
) * s
);
298 indirect_msort_with_tmp (const struct msort_param
*p
, void *b
, size_t n
,
301 /* Indirect sorting. */
302 char *ip
= (char *) b
;
303 void **tp
= (void **) (p
->t
+ n
* sizeof (void *));
305 void *tmp_storage
= (void *) (tp
+ n
);
307 while ((void *) t
< tmp_storage
)
312 msort_with_tmp (p
, p
->t
+ n
* sizeof (void *), n
);
314 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
315 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
318 for (i
= 0, ip
= (char *) b
; i
< n
; i
++, ip
+= s
)
319 if ((kp
= tp
[i
]) != ip
)
323 memcpy (tmp_storage
, ip
, s
);
327 size_t k
= (kp
- (char *) b
) / s
;
337 memcpy (jp
, tmp_storage
, s
);
342 __qsort_r (void *const pbase
, size_t total_elems
, size_t size
,
343 __compar_d_fn_t cmp
, void *arg
)
345 if (total_elems
<= 1)
348 /* Align to the maximum size used by the swap optimization. */
349 _Alignas (uint64_t) char tmp
[QSORT_STACK_SIZE
];
350 size_t total_size
= total_elems
* size
;
353 if (size
> INDIRECT_SORT_SIZE_THRES
)
354 total_size
= 2 * total_elems
* sizeof (void *) + size
;
356 if (total_size
<= sizeof tmp
)
361 buf
= malloc (total_size
);
365 /* Fallback to heapsort in case of memory failure. */
366 heapsort_r (pbase
, total_elems
- 1, size
, cmp
, arg
);
371 if (size
> INDIRECT_SORT_SIZE_THRES
)
373 const struct msort_param msort_param
=
375 .s
= sizeof (void *),
378 .var
= SWAP_VOID_ARG
,
381 indirect_msort_with_tmp (&msort_param
, pbase
, total_elems
, size
);
385 const struct msort_param msort_param
=
390 .var
= get_swap_type (pbase
, size
),
393 msort_with_tmp (&msort_param
, pbase
, total_elems
);
399 libc_hidden_def (__qsort_r
)
400 weak_alias (__qsort_r
, qsort_r
)
403 qsort (void *b
, size_t n
, size_t s
, __compar_fn_t cmp
)
405 return __qsort_r (b
, n
, s
, (__compar_d_fn_t
) cmp
, NULL
);
407 libc_hidden_def (qsort
)