1 /* Generate random permutations.
3 Copyright (C) 2006-2024 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert. */
28 #include "attribute.h"
29 #include "count-leading-zeros.h"
33 /* Return the floor of the log base 2 of N. If N is zero, return -1. */
35 ATTRIBUTE_CONST
static int
38 static_assert (SIZE_WIDTH
<= ULLONG_WIDTH
);
40 : SIZE_WIDTH
<= UINT_WIDTH
41 ? UINT_WIDTH
- 1 - count_leading_zeros (n
)
42 : SIZE_WIDTH
<= ULONG_WIDTH
43 ? ULONG_WIDTH
- 1 - count_leading_zeros_l (n
)
44 : ULLONG_WIDTH
- 1 - count_leading_zeros_ll (n
));
47 /* Return an upper bound on the number of random bytes needed to
48 generate the first H elements of a random permutation of N
49 elements. H must not exceed N. */
52 randperm_bound (size_t h
, size_t n
)
54 /* Upper bound on number of bits needed to generate the first number
55 of the permutation. */
56 uintmax_t lg_n
= floor_lg (n
) + 1;
58 /* Upper bound on number of bits needed to generated the first H elements. */
59 uintmax_t ar
= lg_n
* h
;
61 /* Convert the bit count to a byte count. */
62 size_t bound
= (ar
+ CHAR_BIT
- 1) / CHAR_BIT
;
67 /* Swap elements I and J in array V. */
70 swap (size_t *v
, size_t i
, size_t j
)
77 /* Structures and functions for a sparse_map abstract data type that's
78 used to effectively swap elements I and J in array V like swap(),
79 but in a more memory efficient manner (when the number of permutations
80 performed is significantly less than the size of the input). */
89 sparse_hash_ (void const *x
, size_t table_size
)
91 struct sparse_ent_
const *ent
= x
;
92 return ent
->index
% table_size
;
96 sparse_cmp_ (void const *x
, void const *y
)
98 struct sparse_ent_
const *ent1
= x
;
99 struct sparse_ent_
const *ent2
= y
;
100 return ent1
->index
== ent2
->index
;
103 typedef Hash_table sparse_map
;
105 /* Initialize the structure for the sparse map,
106 when a best guess as to the number of entries
107 specified with SIZE_HINT. */
110 sparse_new (size_t size_hint
)
112 return hash_initialize (size_hint
, nullptr, sparse_hash_
, sparse_cmp_
, free
);
115 /* Swap the values for I and J. If a value is not already present
116 then assume it's equal to the index. Update the value for
117 index I in array V. */
120 sparse_swap (sparse_map
*sv
, size_t *v
, size_t i
, size_t j
)
122 struct sparse_ent_
*v1
= hash_remove (sv
, &(struct sparse_ent_
) {i
,0});
123 struct sparse_ent_
*v2
= hash_remove (sv
, &(struct sparse_ent_
) {j
,0});
125 /* FIXME: reduce the frequency of these mallocs. */
128 v1
= xmalloc (sizeof *v1
);
129 v1
->index
= v1
->val
= i
;
133 v2
= xmalloc (sizeof *v2
);
134 v2
->index
= v2
->val
= j
;
140 if (!hash_insert (sv
, v1
))
142 if (!hash_insert (sv
, v2
))
149 sparse_free (sparse_map
*sv
)
155 /* From R, allocate and return a malloc'd array of the first H elements
156 of a random permutation of N elements. H must not exceed N.
157 Return nullptr if H is zero. */
160 randperm_new (struct randint_source
*r
, size_t h
, size_t n
)
171 v
= xmalloc (sizeof *v
);
172 v
[0] = randint_choose (r
, n
);
177 /* The algorithm is essentially the same in both
178 the sparse and non sparse case. In the sparse case we use
179 a hash to implement sparse storage for the set of n numbers
180 we're shuffling. When to use the sparse method was
181 determined with the help of this script:
184 for n in $(seq 2 32); do
185 for h in $(seq 2 32); do
186 test $h -gt $n && continue
188 test $s = o && shuf=shuf || shuf=./shuf
189 num=$(env time -f "$s:${h},${n} = %e,%M" \
190 $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
191 test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
196 This showed that if sparseness = n/h, then:
198 sparseness = 128 => .125 mem used, and about same speed
199 sparseness = 64 => .25 mem used, but 1.5 times slower
200 sparseness = 32 => .5 mem used, but 2 times slower
202 Also the memory usage was only significant when n > 128Ki
204 bool sparse
= (n
>= (128 * 1024)) && (n
/ h
>= 32);
211 sv
= sparse_new (h
* 2);
214 v
= xnmalloc (h
, sizeof *v
);
218 sv
= nullptr; /* To placate GCC's -Wuninitialized. */
219 v
= xnmalloc (n
, sizeof *v
);
220 for (i
= 0; i
< n
; i
++)
224 for (i
= 0; i
< h
; i
++)
226 size_t j
= i
+ randint_choose (r
, n
- i
);
228 sparse_swap (sv
, v
, i
, j
);
236 v
= xnrealloc (v
, h
, sizeof *v
);