1 /* Platform-independent deterministic sort function.
2 Copyright (C) 2018-2024 Free Software Foundation, Inc.
3 Contributed by Alexander Monakov.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This implements a sort function suitable for GCC use cases:
22 - signature-compatible to C qsort, but relaxed contract:
23 - may apply the comparator to elements in a temporary buffer
24 - may abort on allocation failure
25 - deterministic (but not necessarily stable)
26 - fast, especially for common cases (0-5 elements of size 8 or 4)
28 The implementation uses sorting networks for up to 5 elements and
29 a merge sort on top of that. Neither stage has branches depending on
30 comparator result, trading extra arithmetic for branch mispredictions. */
41 #define noinline __attribute__ ((__noinline__))
46 /* C-style qsort comparator function type. */
47 typedef int cmp_fn (const void *, const void *);
49 /* Structure holding read-mostly (read-only in netsort) context. */
52 cmp_fn
*cmp
; // pointer to comparator
53 char *out
; // output buffer
54 size_t n
; // number of elements
55 size_t size
; // element size
56 size_t nlim
; // limit for using sorting networks
59 /* Like sort_ctx, but for use with qsort_r-style comparators. Several
60 functions in this file are templates that work with either context type. */
69 int cmp (const void *a
, const void *b
)
71 return cmp_ (a
, b
, data
);
75 /* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
76 placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
77 template<typename sort_ctx
>
79 reorder23 (sort_ctx
*c
, char *e0
, char *e1
, char *e2
)
81 #define REORDER_23(TYPE, STRIDE, OFFSET) \
84 memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \
85 memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \
86 char *out = c->out + OFFSET; \
87 if (LIKELY (c->n == 3)) \
88 memmove (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE));\
89 memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \
90 memcpy (out, &t1, sizeof (TYPE)); \
93 if (LIKELY (c
->size
== sizeof (size_t)))
94 REORDER_23 (size_t, sizeof (size_t), 0);
95 else if (LIKELY (c
->size
== sizeof (int)))
96 REORDER_23 (int, sizeof (int), 0);
99 size_t offset
= 0, step
= sizeof (size_t);
100 for (; offset
+ step
<= c
->size
; offset
+= step
)
101 REORDER_23 (size_t, c
->size
, offset
);
102 for (; offset
< c
->size
; offset
++)
103 REORDER_23 (char, c
->size
, offset
);
107 /* Like reorder23, but permute 4 or 5 elements. */
108 template<typename sort_ctx
>
110 reorder45 (sort_ctx
*c
, char *e0
, char *e1
, char *e2
, char *e3
, char *e4
)
112 #define REORDER_45(TYPE, STRIDE, OFFSET) \
114 TYPE t0, t1, t2, t3; \
115 memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \
116 memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \
117 memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \
118 memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \
119 char *out = c->out + OFFSET; \
120 if (LIKELY (c->n == 5)) \
121 memmove (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE));\
122 memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \
123 memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \
124 memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \
125 memcpy (out, &t3, sizeof (TYPE)); \
128 if (LIKELY (c
->size
== sizeof (size_t)))
129 REORDER_45 (size_t, sizeof (size_t), 0);
130 else if (LIKELY (c
->size
== sizeof (int)))
131 REORDER_45 (int, sizeof (int), 0);
134 size_t offset
= 0, step
= sizeof (size_t);
135 for (; offset
+ step
<= c
->size
; offset
+= step
)
136 REORDER_45 (size_t, c
->size
, offset
);
137 for (; offset
< c
->size
; offset
++)
138 REORDER_45 (char, c
->size
, offset
);
142 /* Helper for netsort. Invoke comparator CMP on E0 and E1.
143 Return E0^E1 if E0 compares less than E1, zero otherwise.
144 This is noinline to avoid code growth and confine invocation
145 to a single call site, assisting indirect branch prediction. */
146 template<typename sort_ctx
>
147 noinline
static intptr_t
148 cmp1 (char *e0
, char *e1
, sort_ctx
*c
)
150 intptr_t x
= (intptr_t)e0
^ (intptr_t)e1
;
151 return x
& (c
->cmp (e0
, e1
) >> 31);
154 /* Apply a sorting network to 2 to 5 elements from IN, placing them into C->OUT.
155 IN may be equal to C->OUT, in which case elements are sorted in place. */
156 template<typename sort_ctx
>
158 netsort (char *in
, sort_ctx
*c
)
160 #define CMP(e0, e1) \
162 intptr_t x = cmp1 (e1, e0, c); \
163 e0 = (char *)((intptr_t)e0 ^ x); \
164 e1 = (char *)((intptr_t)e1 ^ x); \
167 char *e0
= in
, *e1
= e0
+ c
->size
, *e2
= e1
+ c
->size
;
169 if (LIKELY (c
->n
== 3))
175 return reorder23 (c
, e0
, e1
, e2
);
176 char *e3
= e2
+ c
->size
, *e4
= e3
+ c
->size
;
177 if (LIKELY (c
->n
== 5))
183 if (LIKELY (c
->n
== 5))
191 reorder45 (c
, e0
, e1
, e2
, e3
, e4
);
194 /* Execute merge sort on N elements from IN, placing them into OUT,
195 using TMP as temporary storage if IN is equal to OUT.
196 This is a stable sort if netsort is used only for 2 or 3 elements. */
197 template<typename sort_ctx
>
199 mergesort (char *in
, sort_ctx
*c
, size_t n
, char *out
, char *tmp
)
201 if (LIKELY (n
<= c
->nlim
))
205 return netsort (in
, c
);
207 size_t nl
= n
/ 2, nr
= n
- nl
, sz
= nl
* c
->size
;
208 char *mid
= in
+ sz
, *r
= out
+ sz
, *l
= in
== out
? tmp
: in
;
209 /* Sort the right half, outputting to right half of OUT. */
210 mergesort (mid
, c
, nr
, r
, tmp
);
211 /* Sort the left half, leaving left half of OUT free. */
212 mergesort (in
, c
, nl
, l
, mid
);
213 /* Merge sorted halves given by L, R to [OUT, END). */
214 #define MERGE_ELTSIZE(SIZE) \
216 intptr_t mr = c->cmp (r, l) >> 31; \
217 intptr_t lr = (intptr_t)l ^ (intptr_t)r; \
218 lr = (intptr_t)l ^ (lr & mr); \
219 out = (char *)memcpy (out, (char *)lr, SIZE); \
222 if (r == out) return; \
226 if (LIKELY (c
->cmp (r
, l
+ (r
- out
) - c
->size
) < 0))
228 char *end
= out
+ n
* c
->size
;
229 if (sizeof (size_t) == 8 && LIKELY (c
->size
== 8))
231 else if (LIKELY (c
->size
== 4))
234 MERGE_ELTSIZE (c
->size
);
236 memcpy (out
, l
, r
- out
);
240 /* Don't complain about cast from void* to function pointer. */
241 #pragma GCC diagnostic push
242 #pragma GCC diagnostic ignored "-Wconditionally-supported"
244 /* Adapter for using two-argument comparators in functions expecting the
245 three-argument sort_r_cmp_fn type. */
247 cmp2to3 (const void *a
, const void *b
, void *c
)
249 return ((cmp_fn
*)c
) (a
, b
);
253 /* Replacement for C qsort. */
255 gcc_qsort (void *vbase
, size_t n
, size_t size
, cmp_fn
*cmp
)
260 bool stable
= (ssize_t
) size
< 0;
262 nlim
= 3, size
= ~size
;
263 char *base
= (char *)vbase
;
264 sort_ctx c
= {cmp
, base
, n
, size
, nlim
};
265 long long scratch
[32];
266 size_t bufsz
= (n
/ 2) * size
;
267 void *buf
= bufsz
<= sizeof scratch
? scratch
: xmalloc (bufsz
);
268 mergesort (base
, &c
, n
, base
, (char *)buf
);
272 qsort_chk (vbase
, n
, size
, cmp2to3
, (void*)cmp
);
273 #pragma GCC diagnostic pop
277 /* Substitute for Glibc qsort_r. */
279 gcc_sort_r (void *vbase
, size_t n
, size_t size
, sort_r_cmp_fn
*cmp
, void *data
)
284 bool stable
= (ssize_t
) size
< 0;
286 nlim
= 3, size
= ~size
;
287 char *base
= (char *)vbase
;
288 sort_r_ctx c
= {data
, cmp
, base
, n
, size
, nlim
};
289 long long scratch
[32];
290 size_t bufsz
= (n
/ 2) * size
;
291 void *buf
= bufsz
<= sizeof scratch
? scratch
: xmalloc (bufsz
);
292 mergesort (base
, &c
, n
, base
, (char *)buf
);
296 qsort_chk (vbase
, n
, size
, cmp
, data
);
300 /* Stable sort, signature-compatible to C qsort. */
302 gcc_stablesort (void *vbase
, size_t n
, size_t size
, cmp_fn
*cmp
)
304 gcc_qsort (vbase
, n
, ~size
, cmp
);
307 /* Stable sort, signature-compatible to Glibc qsort_r. */
309 gcc_stablesort_r (void *vbase
, size_t n
, size_t size
, sort_r_cmp_fn
*cmp
,
312 gcc_sort_r (vbase
, n
, ~size
, cmp
, data
);