16449 Want smbios additional information (Type 40) decoding
[illumos-gate.git] / usr / src / common / util / qsort.c
blob78967c0ac633ca6e81c9d9f6e2f43f757990702a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2020 Oxide Computer Company
28 #if !defined(_KERNEL) && !defined(_KMDB)
29 #include "lint.h"
30 #endif /* !_KERNEL && !_KMDB */
32 #include <sys/types.h>
34 #if !defined(_KERNEL) && !defined(_KMDB)
35 #include <stdlib.h>
36 #include <synch.h>
37 #endif /* !_KERNEL && !_KMDB */
39 #include "qsort.h"
41 static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
42 static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
43 static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
44 static void swapb(char *r1, char *r2, size_t cnt);
46 typedef int (*cmp_f)(const void *, const void *, void *);
49 * choose a median of 3 values
51 static __GNU_INLINE char *
52 med3(char *a, char *b, char *c, cmp_f cmp, void *arg)
54 if (cmp(a, b, arg) < 0) {
55 if (cmp(b, c, arg) < 0) {
56 return (b);
57 } else if (cmp(a, c, arg) < 0) {
58 return (c);
59 } else {
60 return (a);
62 } else {
63 if (cmp(b, c, arg) > 0) {
64 return (b);
65 } else if (cmp(a, c, arg) > 0) {
66 return (c);
67 } else {
68 return (a);
73 #define THRESH_L 5 /* threshold for insertion sort */
74 #define THRESH_M3 20 /* threshold for median of 3 */
75 #define THRESH_M9 50 /* threshold for median of 9 */
77 typedef struct {
78 char *b_lim;
79 size_t nrec;
80 } stk_t;
83 * qsort() is a general purpose, in-place sorting routine using a
84 * user provided call back function for comparisons. This implementation
85 * utilizes a ternary quicksort algorithm, and cuts over to an
86 * insertion sort for partitions involving fewer than THRESH_L records.
88 * Potential User Errors
89 * There is no return value from qsort, this function has no method
90 * of alerting the user that a sort did not work or could not work.
91 * We do not print an error message or exit the process or thread,
92 * Even if we can detect an error, We CANNOT silently return without
93 * sorting the data, if we did so the user could never be sure the
94 * sort completed successfully.
95 * It is possible we could change the return value of sort from void
96 * to int and return success or some error codes, but this gets into
97 * standards and compatibility issues.
99 * Examples of qsort parameter errors might be
100 * 1) record size (rsiz) equal to 0
101 * qsort will loop and never return.
102 * 2) record size (rsiz) less than 0
103 * rsiz is unsigned, so a negative value is insanely large
104 * 3) number of records (nrec) is 0
105 * This is legal - qsort will return without examining any records
106 * 4) number of records (nrec) is less than 0
107 * nrec is unsigned, so a negative value is insanely large.
108 * 5) nrec * rsiz > memory allocation for sort array
109 * a segment violation may occur
110 * corruption of other memory may occur
111 * 6) The base address of the sort array is invalid
112 * a segment violation may occur
113 * corruption of other memory may occur
114 * 7) The user call back function is invalid
115 * we may get alignment errors or segment violations
116 * we may jump into never-never land
118 * Some less obvious errors might be
119 * 8) The user compare function is not comparing correctly
120 * 9) The user compare function modifies the data records
123 static int
124 qsort_r_wrapper(const void *a, const void *b, void *arg)
126 int (*cmp)(const void *, const void *) =
127 (int(*)(const void *, const void *))(uintptr_t)arg;
128 return (cmp(a, b));
131 void
132 qsort_r(void *basep, size_t nrec, size_t rsiz,
133 cmp_f cmp, void *arg)
135 size_t i; /* temporary variable */
137 /* variables used by swap */
138 void (*swapf)(char *, char *, size_t);
139 size_t loops;
141 /* variables used by sort */
142 stk_t stack[8 * sizeof (nrec) + 1];
143 stk_t *sp;
144 char *b_lim; /* bottom limit */
145 char *b_dup; /* bottom duplicate */
146 char *b_par; /* bottom partition */
147 char *t_lim; /* top limit */
148 char *t_dup; /* top duplicate */
149 char *t_par; /* top partition */
150 char *m1, *m2, *m3; /* median pointers */
151 uintptr_t d_bytelength; /* byte length of duplicate records */
152 int b_nrec;
153 int t_nrec;
154 int cv; /* results of compare (bottom / top) */
157 * choose a swap function based on alignment and size
159 * The qsort function sorts an array of fixed length records.
160 * We have very limited knowledge about the data record itself.
161 * It may be that the data record is in the array we are sorting
162 * or it may be that the array contains pointers or indexes to
163 * the actual data record and all that we are sorting is the indexes.
165 * The following decision will choose an optimal swap function
166 * based on the size and alignment of the data records
167 * swapp64 will swap 64 bit pointers
168 * swapp32 will swap 32 bit pointers
169 * swapi will swap an array of 32 bit integers
170 * swapb will swap an array of 8 bit characters
172 * swapi and swapb will also require the variable loops to be set
173 * to control the length of the array being swapped
175 if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
176 (rsiz == sizeof (uint64_t))) {
177 loops = 1;
178 swapf = (void (*)(char *, char *, size_t))swapp64;
179 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
180 (rsiz == sizeof (uint32_t))) {
181 loops = 1;
182 swapf = (void (*)(char *, char *, size_t))swapp32;
183 } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
184 ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
185 loops = rsiz / sizeof (int);
186 swapf = (void (*)(char *, char *, size_t))swapi;
187 } else {
188 loops = rsiz;
189 swapf = swapb;
193 * qsort is a partitioning sort
195 * the stack is the bookkeeping mechanism to keep track of all
196 * the partitions.
198 * each sort pass takes one partition and sorts it into two partitions.
199 * at the top of the loop we simply take the partition on the top
200 * of the stack and sort it. See the comments at the bottom
201 * of the loop regarding which partitions to add in what order.
203 * initially put the whole partition on the stack
205 sp = stack;
206 sp->b_lim = (char *)basep;
207 sp->nrec = nrec;
208 sp++;
209 while (sp > stack) {
210 sp--;
211 b_lim = sp->b_lim;
212 nrec = sp->nrec;
215 * a linear insertion sort i faster than a qsort for
216 * very small number of records (THRESH_L)
218 * if number records < threshold use linear insertion sort
220 * this also handles the special case where the partition
221 * 0 or 1 records length.
223 if (nrec < THRESH_L) {
225 * Linear insertion sort
227 t_par = b_lim;
228 for (i = 1; i < nrec; i++) {
229 t_par += rsiz;
230 b_par = t_par;
231 while (b_par > b_lim) {
232 b_par -= rsiz;
233 if ((*cmp)(b_par, b_par + rsiz, arg) <=
234 0) {
235 break;
237 (*swapf)(b_par, b_par + rsiz, loops);
242 * a linear insertion sort will put all records
243 * in their final position and will not create
244 * subpartitions.
246 * therefore when the insertion sort is complete
247 * just go to the top of the loop and get the
248 * next partition to sort.
250 continue;
253 /* quicksort */
256 * choose a pivot record
258 * Ideally the pivot record will divide the partition
259 * into two equal parts. however we have to balance the
260 * work involved in selecting the pivot record with the
261 * expected benefit.
263 * The choice of pivot record depends on the number of
264 * records in the partition
266 * for small partitions (nrec < THRESH_M3)
267 * we just select the record in the middle of the partition
269 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
270 * we select three values and choose the median of 3
272 * if (nrec >= THRESH_M9)
273 * then we use an approximate median of 9
274 * 9 records are selected and grouped in 3 groups of 3
275 * the median of each of these 3 groups is fed into another
276 * median of 3 decision.
278 * Each median of 3 decision is 2 or 3 compares,
279 * so median of 9 costs between 8 and 12 compares.
281 * i is byte distance between two consecutive samples
282 * m2 will point to the pivot record
284 if (nrec < THRESH_M3) {
285 m2 = b_lim + (nrec / 2) * rsiz;
286 } else if (nrec < THRESH_M9) {
287 /* use median of 3 */
288 i = ((nrec - 1) / 2) * rsiz;
289 m2 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg);
290 } else {
291 /* approx median of 9 */
292 i = ((nrec - 1) / 8) * rsiz;
293 m1 = med3(b_lim, b_lim + i, b_lim + 2 * i, cmp, arg);
294 m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i,
295 cmp, arg);
296 m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i,
297 cmp, arg);
298 m2 = med3(m1, m2, m3, cmp, arg);
302 * quick sort partitioning
304 * The partition limits are defined by bottom and top pointers
305 * b_lim and t_lim.
307 * qsort uses a fairly standard method of moving the
308 * partitioning pointers, b_par and t_par, to the middle of
309 * the partition and exchanging records that are in the
310 * wrong part of the partition.
312 * Two enhancements have been made to the basic algorithm.
313 * One for handling duplicate records and one to minimize
314 * the number of swaps.
316 * Two duplicate records pointers are (b_dup and t_dup) are
317 * initially set to b_lim and t_lim. Each time a record
318 * whose sort key value is equal to the pivot record is found
319 * it will be swapped with the record pointed to by
320 * b_dup or t_dup and the duplicate pointer will be
321 * incremented toward the center.
322 * When partitioning is complete, all the duplicate records
323 * will have been collected at the upper and lower limits of
324 * the partition and can easily be moved adjacent to the
325 * pivot record.
327 * The second optimization is to minimize the number of swaps.
328 * The pointer m2 points to the pivot record.
329 * During partitioning, if m2 is ever equal to the partitioning
330 * pointers, b_par or t_par, then b_par or t_par just moves
331 * onto the next record without doing a compare.
332 * If as a result of duplicate record detection,
333 * b_dup or t_dup is ever equal to m2,
334 * then m2 is changed to point to the duplicate record and
335 * b_dup or t_dup is incremented with out swapping records.
337 * When partitioning is done, we may not have the same pivot
338 * record that we started with, but we will have one with
339 * an equal sort key.
341 b_dup = b_par = b_lim;
342 t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1);
343 for (;;) {
345 /* move bottom pointer up */
346 for (; b_par <= t_par; b_par += rsiz) {
347 if (b_par == m2) {
348 continue;
350 cv = cmp(b_par, m2, arg);
351 if (cv > 0) {
352 break;
354 if (cv == 0) {
355 if (b_dup == m2) {
356 m2 = b_par;
357 } else if (b_dup != b_par) {
358 (*swapf)(b_dup, b_par, loops);
360 b_dup += rsiz;
364 /* move top pointer down */
365 for (; b_par < t_par; t_par -= rsiz) {
366 if (t_par == m2) {
367 continue;
369 cv = cmp(t_par, m2, arg);
370 if (cv < 0) {
371 break;
373 if (cv == 0) {
374 if (t_dup == m2) {
375 m2 = t_par;
376 } else if (t_dup != t_par) {
377 (*swapf)(t_dup, t_par, loops);
379 t_dup -= rsiz;
383 /* break if we are done partitioning */
384 if (b_par >= t_par) {
385 break;
388 /* exchange records at upper and lower break points */
389 (*swapf)(b_par, t_par, loops);
390 b_par += rsiz;
391 t_par -= rsiz;
395 * partitioning is now complete
397 * there are two termination conditions from the partitioning
398 * loop above. Either b_par or t_par have crossed or
399 * they are equal.
401 * we need to swap the pivot record to its final position
402 * m2 could be in either the upper or lower partitions
403 * or it could already be in its final position
406 * R[b_par] > R[m2]
407 * R[t_par] < R[m2]
409 if (t_par < b_par) {
410 if (m2 < t_par) {
411 (*swapf)(m2, t_par, loops);
412 m2 = b_par = t_par;
413 } else if (m2 > b_par) {
414 (*swapf)(m2, b_par, loops);
415 m2 = t_par = b_par;
416 } else {
417 b_par = t_par = m2;
419 } else {
420 if (m2 < t_par) {
421 t_par = b_par = t_par - rsiz;
423 if (m2 != b_par) {
424 (*swapf)(m2, b_par, loops);
426 m2 = t_par;
430 * move bottom duplicates next to pivot
431 * optimized to eliminate overlap
433 d_bytelength = b_dup - b_lim;
434 if (b_par - b_dup < d_bytelength) {
435 b_dup = b_lim + (b_par - b_dup);
437 while (b_dup > b_lim) {
438 b_dup -= rsiz;
439 b_par -= rsiz;
440 (*swapf)(b_dup, b_par, loops);
442 b_par = m2 - d_bytelength;
445 * move top duplicates next to pivot
447 d_bytelength = t_lim - t_dup;
448 if (t_dup - t_par < d_bytelength) {
449 t_dup = t_lim - (t_dup - t_par);
451 while (t_dup < t_lim) {
452 t_dup += rsiz;
453 t_par += rsiz;
454 (*swapf)(t_dup, t_par, loops);
456 t_par = m2 + d_bytelength;
459 * when a qsort pass completes there are three partitions
460 * 1) the lower contains all records less than pivot
461 * 2) the upper contains all records greater than pivot
462 * 3) the pivot partition contains all record equal to pivot
464 * all records in the pivot partition are in their final
465 * position and do not need to be accounted for by the stack
467 * when adding partitions to the stack
468 * it is important to add the largest partition first
469 * to prevent stack overflow.
471 * calculate number of unsorted records in top and bottom
472 * push resulting partitions on stack
474 b_nrec = (b_par - b_lim) / rsiz;
475 t_nrec = (t_lim - t_par) / rsiz;
476 if (b_nrec < t_nrec) {
477 sp->b_lim = t_par + rsiz;
478 sp->nrec = t_nrec;
479 sp++;
480 sp->b_lim = b_lim;
481 sp->nrec = b_nrec;
482 sp++;
483 } else {
484 sp->b_lim = b_lim;
485 sp->nrec = b_nrec;
486 sp++;
487 sp->b_lim = t_par + rsiz;
488 sp->nrec = t_nrec;
489 sp++;
494 void
495 qsort(void *basep, size_t nrec, size_t rsiz,
496 int (*cmp)(const void *, const void *))
498 return (qsort_r(basep, nrec, rsiz, qsort_r_wrapper,
499 (void *)(uintptr_t)cmp));
503 * The following swap functions should not create a stack frame
504 * the SPARC call / return instruction will be executed
505 * but the a save / restore will not be executed
506 * which means we won't do a window turn with the spill / fill overhead
507 * verify this by examining the assembly code
510 /* ARGSUSED */
511 static void
512 swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
514 uint32_t temp;
516 temp = *r1;
517 *r1++ = *r2;
518 *r2++ = temp;
521 /* ARGSUSED */
522 static void
523 swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
525 uint64_t temp;
527 temp = *r1;
528 *r1++ = *r2;
529 *r2++ = temp;
532 static void
533 swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
535 uint32_t temp;
537 /* character by character */
538 while (cnt--) {
539 temp = *r1;
540 *r1++ = *r2;
541 *r2++ = temp;
545 static void
546 swapb(char *r1, char *r2, size_t cnt)
548 char temp;
550 /* character by character */
551 while (cnt--) {
552 temp = *r1;
553 *r1++ = *r2;
554 *r2++ = temp;