doc: clarify ln's --help output
[coreutils/ericb.git] / gl / lib / randperm.c
blob362316e23f79f89cbb84428a80e2e35e474c1e6f
1 /* Generate random permutations.
3 Copyright (C) 2006-2007, 2009-2011 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert. */
20 #include <config.h>
22 #include "hash.h"
23 #include "randperm.h"
25 #include <limits.h>
26 #include <stdlib.h>
28 #include "xalloc.h"
30 /* Return the ceiling of the log base 2 of N. If N is zero, return
31 an unspecified value. */
33 static size_t _GL_ATTRIBUTE_CONST
34 ceil_lg (size_t n)
36 size_t b = 0;
37 for (n--; n != 0; n /= 2)
38 b++;
39 return b;
42 /* Return an upper bound on the number of random bytes needed to
43 generate the first H elements of a random permutation of N
44 elements. H must not exceed N. */
46 size_t
47 randperm_bound (size_t h, size_t n)
49 /* Upper bound on number of bits needed to generate the first number
50 of the permutation. */
51 size_t lg_n = ceil_lg (n);
53 /* Upper bound on number of bits needed to generated the first H elements. */
54 size_t ar = lg_n * h;
56 /* Convert the bit count to a byte count. */
57 size_t bound = (ar + CHAR_BIT - 1) / CHAR_BIT;
59 return bound;
62 /* Swap elements I and J in array V. */
64 static void
65 swap (size_t *v, size_t i, size_t j)
67 size_t t = v[i];
68 v[i] = v[j];
69 v[j] = t;
72 /* Structures and functions for a sparse_map abstract data type that's
73 used to effectively swap elements I and J in array V like swap(),
74 but in a more memory efficient manner (when the number of permutations
75 performed is significantly less than the size of the input). */
77 struct sparse_ent_
79 size_t index;
80 size_t val;
83 static size_t
84 sparse_hash_ (void const *x, size_t table_size)
86 struct sparse_ent_ const *ent = x;
87 return ent->index % table_size;
90 static bool
91 sparse_cmp_ (void const *x, void const *y)
93 struct sparse_ent_ const *ent1 = x;
94 struct sparse_ent_ const *ent2 = y;
95 return ent1->index == ent2->index;
98 typedef Hash_table sparse_map;
100 /* Initialize the structure for the sparse map,
101 when a best guess as to the number of entries
102 specified with SIZE_HINT. */
104 static sparse_map *
105 sparse_new (size_t size_hint)
107 return hash_initialize (size_hint, NULL, sparse_hash_, sparse_cmp_, free);
110 /* Swap the values for I and J. If a value is not already present
111 then assume it's equal to the index. Update the value for
112 index I in array V. */
114 static void
115 sparse_swap (sparse_map *sv, size_t* v, size_t i, size_t j)
117 struct sparse_ent_ *v1 = hash_delete (sv, &(struct sparse_ent_) {i,0});
118 struct sparse_ent_ *v2 = hash_delete (sv, &(struct sparse_ent_) {j,0});
120 /* FIXME: reduce the frequency of these mallocs. */
121 if (!v1)
123 v1 = xmalloc (sizeof *v1);
124 v1->index = v1->val = i;
126 if (!v2)
128 v2 = xmalloc (sizeof *v2);
129 v2->index = v2->val = j;
132 size_t t = v1->val;
133 v1->val = v2->val;
134 v2->val = t;
135 if (!hash_insert (sv, v1))
136 xalloc_die ();
137 if (!hash_insert (sv, v2))
138 xalloc_die ();
140 v[i] = v1->val;
143 static void
144 sparse_free (sparse_map *sv)
146 hash_free (sv);
150 /* From R, allocate and return a malloc'd array of the first H elements
151 of a random permutation of N elements. H must not exceed N.
152 Return NULL if H is zero. */
154 size_t *
155 randperm_new (struct randint_source *r, size_t h, size_t n)
157 size_t *v;
159 switch (h)
161 case 0:
162 v = NULL;
163 break;
165 case 1:
166 v = xmalloc (sizeof *v);
167 v[0] = randint_choose (r, n);
168 break;
170 default:
172 /* The algorithm is essentially the same in both
173 the sparse and non sparse case. In the sparse case we use
174 a hash to implement sparse storage for the set of n numbers
175 we're shuffling. When to use the sparse method was
176 determined with the help of this script:
178 #!/bin/sh
179 for n in $(seq 2 32); do
180 for h in $(seq 2 32); do
181 test $h -gt $n && continue
182 for s in o n; do
183 test $s = o && shuf=shuf || shuf=./shuf
184 num=$(env time -f "$s:${h},${n} = %e,%M" \
185 $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
186 test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
187 done
188 done
189 done
191 This showed that if sparseness = n/h, then:
193 sparseness = 128 => .125 mem used, and about same speed
194 sparseness = 64 => .25 mem used, but 1.5 times slower
195 sparseness = 32 => .5 mem used, but 2 times slower
197 Also the memory usage was only significant when n > 128Ki
199 bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
201 size_t i;
202 sparse_map *sv;
204 if (sparse)
206 sv = sparse_new (h * 2);
207 if (sv == NULL)
208 xalloc_die ();
209 v = xnmalloc (h, sizeof *v);
211 else
213 sv = NULL; /* To placate GCC's -Wuninitialized. */
214 v = xnmalloc (n, sizeof *v);
215 for (i = 0; i < n; i++)
216 v[i] = i;
219 for (i = 0; i < h; i++)
221 size_t j = i + randint_choose (r, n - i);
222 if (sparse)
223 sparse_swap (sv, v, i, j);
224 else
225 swap (v, i, j);
228 if (sparse)
229 sparse_free (sv);
230 else
231 v = xnrealloc (v, h, sizeof *v);
233 break;
236 return v;