2 * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3 * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4 * The MIT license text is as follows:
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #define MID(lo, hi) (lo + ((hi - lo) >> 1))
32 /* The code here is an optimized merge sort. Starting from a generic merge sort
33 * the following optimizations were applied:
35 * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36 * every element into a temporary buffer, blocks of elements are copied
39 * o To reduce the number of memcpy() calls further, copying leading
40 * and trailing elements into our temporary buffer is avoided, in case it is
41 * not necessary to merge them.
43 * A further optimization could be to specialize memcpy calls based on the
44 * size of the types we compare. For now, this code does not include the
45 * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46 * implementation. It is not clear, that the specialized version as provided in
47 * the blog post, is really superior to the one that will be inlined by
48 * default. So we decided to keep the code simple until this optimization was
49 * proven to be beneficial.
53 msort (void *array
, void *buf
, size_t low
, size_t high
, size_t size
,
54 int (* compare
) (const void *, const void *, void *), void *arg
)
56 char *a1
, *al
, *am
, *ah
, *ls
, *hs
, *lo
, *hi
, *b
;
60 mid
= MID (low
, high
);
63 msort (array
, buf
, mid
+ 1, high
, size
, compare
, arg
);
66 msort (array
, buf
, low
, mid
, size
, compare
, arg
);
68 ah
= ((char *) array
) + ((high
+ 1) * size
);
69 am
= ((char *) array
) + ((mid
+ 1) * size
);
70 a1
= al
= ((char *) array
) + (low
* size
);
80 if (lo
> al
|| hi
> am
) {
81 /* our last loop already compared lo & hi and found lo <= hi */
85 while (lo
< am
&& compare (lo
, hi
, arg
) <= 0)
90 /* avoid copying the leading items */
95 /* our last compare tells us hi < lo */
98 while (hi
< ah
&& compare (hi
, lo
, arg
) < 0)
102 memcpy (b
, ls
, lo
- ls
);
107 memcpy (b
, hs
, hi
- hs
);
111 memcpy (b
, ls
, lo
- ls
);
115 /* copy everything we needed to re-order back into array */
116 memcpy (a1
, buf
, copied
);
119 /* everything already in order */
125 memcpy (b
, lo
, am
- lo
);
129 memcpy (a1
, buf
, copied
);
133 MergeSort (void *base
, size_t nmemb
, size_t size
,
134 int (* compare
) (const void *, const void *, void *), void *arg
)
141 if (!(tmp
= malloc (nmemb
* size
))) {
146 msort (base
, tmp
, 0, nmemb
- 1, size
, compare
, arg
);
153 int isl_sort(void *const pbase
, size_t total_elems
, size_t size
,
154 int (*cmp
)(const void *, const void *, void *arg
), void *arg
)
156 return MergeSort (pbase
, total_elems
, size
, cmp
, arg
);