Enable SSE2 memset for AMD'supcoming Orochi processor.
[glibc.git] / stdlib / msort.c
blobfc58f0d4178b2e8eee61536d380df2800e012afa
1 /* An alternative to qsort, with an identical interface.
2 This file is part of the GNU C Library.
3 Copyright (C) 1992,95-97,99,2000,01,02,04,07 Free Software Foundation, Inc.
4 Written by Mike Haertel, September 1988.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <alloca.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <memcopy.h>
27 #include <errno.h>
28 #include <atomic.h>
30 struct msort_param
32 size_t s;
33 size_t var;
34 __compar_d_fn_t cmp;
35 void *arg;
36 char *t;
38 static void msort_with_tmp (const struct msort_param *p, void *b, size_t n);
40 static void
41 msort_with_tmp (const struct msort_param *p, void *b, size_t n)
43 char *b1, *b2;
44 size_t n1, n2;
46 if (n <= 1)
47 return;
49 n1 = n / 2;
50 n2 = n - n1;
51 b1 = b;
52 b2 = (char *) b + (n1 * p->s);
54 msort_with_tmp (p, b1, n1);
55 msort_with_tmp (p, b2, n2);
57 char *tmp = p->t;
58 const size_t s = p->s;
59 __compar_d_fn_t cmp = p->cmp;
60 void *arg = p->arg;
61 switch (p->var)
63 case 0:
64 while (n1 > 0 && n2 > 0)
66 if ((*cmp) (b1, b2, arg) <= 0)
68 *(uint32_t *) tmp = *(uint32_t *) b1;
69 b1 += sizeof (uint32_t);
70 --n1;
72 else
74 *(uint32_t *) tmp = *(uint32_t *) b2;
75 b2 += sizeof (uint32_t);
76 --n2;
78 tmp += sizeof (uint32_t);
80 break;
81 case 1:
82 while (n1 > 0 && n2 > 0)
84 if ((*cmp) (b1, b2, arg) <= 0)
86 *(uint64_t *) tmp = *(uint64_t *) b1;
87 b1 += sizeof (uint64_t);
88 --n1;
90 else
92 *(uint64_t *) tmp = *(uint64_t *) b2;
93 b2 += sizeof (uint64_t);
94 --n2;
96 tmp += sizeof (uint64_t);
98 break;
99 case 2:
100 while (n1 > 0 && n2 > 0)
102 unsigned long *tmpl = (unsigned long *) tmp;
103 unsigned long *bl;
105 tmp += s;
106 if ((*cmp) (b1, b2, arg) <= 0)
108 bl = (unsigned long *) b1;
109 b1 += s;
110 --n1;
112 else
114 bl = (unsigned long *) b2;
115 b2 += s;
116 --n2;
118 while (tmpl < (unsigned long *) tmp)
119 *tmpl++ = *bl++;
121 break;
122 case 3:
123 while (n1 > 0 && n2 > 0)
125 if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
127 *(void **) tmp = *(void **) b1;
128 b1 += sizeof (void *);
129 --n1;
131 else
133 *(void **) tmp = *(void **) b2;
134 b2 += sizeof (void *);
135 --n2;
137 tmp += sizeof (void *);
139 break;
140 default:
141 while (n1 > 0 && n2 > 0)
143 if ((*cmp) (b1, b2, arg) <= 0)
145 tmp = (char *) __mempcpy (tmp, b1, s);
146 b1 += s;
147 --n1;
149 else
151 tmp = (char *) __mempcpy (tmp, b2, s);
152 b2 += s;
153 --n2;
156 break;
159 if (n1 > 0)
160 memcpy (tmp, b1, n1 * s);
161 memcpy (b, p->t, (n - n2) * s);
165 void
166 qsort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg)
168 size_t size = n * s;
169 char *tmp = NULL;
170 struct msort_param p;
172 /* For large object sizes use indirect sorting. */
173 if (s > 32)
174 size = 2 * n * sizeof (void *) + s;
176 if (size < 1024)
177 /* The temporary array is small, so put it on the stack. */
178 p.t = __alloca (size);
179 else
181 /* We should avoid allocating too much memory since this might
182 have to be backed up by swap space. */
183 static long int phys_pages;
184 static int pagesize;
186 if (pagesize == 0)
188 phys_pages = __sysconf (_SC_PHYS_PAGES);
190 if (phys_pages == -1)
191 /* Error while determining the memory size. So let's
192 assume there is enough memory. Otherwise the
193 implementer should provide a complete implementation of
194 the `sysconf' function. */
195 phys_pages = (long int) (~0ul >> 1);
197 /* The following determines that we will never use more than
198 a quarter of the physical memory. */
199 phys_pages /= 4;
201 /* Make sure phys_pages is written to memory. */
202 atomic_write_barrier ();
204 pagesize = __sysconf (_SC_PAGESIZE);
207 /* Just a comment here. We cannot compute
208 phys_pages * pagesize
209 and compare the needed amount of memory against this value.
210 The problem is that some systems might have more physical
211 memory then can be represented with a `size_t' value (when
212 measured in bytes. */
214 /* If the memory requirements are too high don't allocate memory. */
215 if (size / pagesize > (size_t) phys_pages)
217 _quicksort (b, n, s, cmp, arg);
218 return;
221 /* It's somewhat large, so malloc it. */
222 int save = errno;
223 tmp = malloc (size);
224 __set_errno (save);
225 if (tmp == NULL)
227 /* Couldn't get space, so use the slower algorithm
228 that doesn't need a temporary array. */
229 _quicksort (b, n, s, cmp, arg);
230 return;
232 p.t = tmp;
235 p.s = s;
236 p.var = 4;
237 p.cmp = cmp;
238 p.arg = arg;
240 if (s > 32)
242 /* Indirect sorting. */
243 char *ip = (char *) b;
244 void **tp = (void **) (p.t + n * sizeof (void *));
245 void **t = tp;
246 void *tmp_storage = (void *) (tp + n);
248 while ((void *) t < tmp_storage)
250 *t++ = ip;
251 ip += s;
253 p.s = sizeof (void *);
254 p.var = 3;
255 msort_with_tmp (&p, p.t + n * sizeof (void *), n);
257 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
258 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
259 char *kp;
260 size_t i;
261 for (i = 0, ip = (char *) b; i < n; i++, ip += s)
262 if ((kp = tp[i]) != ip)
264 size_t j = i;
265 char *jp = ip;
266 memcpy (tmp_storage, ip, s);
270 size_t k = (kp - (char *) b) / s;
271 tp[j] = jp;
272 memcpy (jp, kp, s);
273 j = k;
274 jp = kp;
275 kp = tp[k];
277 while (kp != ip);
279 tp[j] = jp;
280 memcpy (jp, tmp_storage, s);
283 else
285 if ((s & (sizeof (uint32_t) - 1)) == 0
286 && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
288 if (s == sizeof (uint32_t))
289 p.var = 0;
290 else if (s == sizeof (uint64_t)
291 && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
292 p.var = 1;
293 else if ((s & (sizeof (unsigned long) - 1)) == 0
294 && ((char *) b - (char *) 0)
295 % __alignof__ (unsigned long) == 0)
296 p.var = 2;
298 msort_with_tmp (&p, b, n);
300 free (tmp);
302 libc_hidden_def (qsort_r)
305 void
306 qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
308 return qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
310 libc_hidden_def (qsort)