GCC doesn't trace switch (foo & 7) completely, so add a default: abort()
[netbsd-mini2440.git] / usr.bin / nbperf / nbperf-bdz.c
blobbba97fd1240c763462ca2623625077e70ebc50a8
1 /* $NetBSD: nbperf-bdz.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */
2 /*-
3 * Copyright (c) 2009 The NetBSD Foundation, Inc.
4 * All rights reserved.
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Joerg Sonnenberger.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
34 #include <sys/cdefs.h>
35 __RCSID("$NetBSD: nbperf-bdz.c,v 1.1 2009/08/15 16:21:05 joerg Exp $");
37 #include <err.h>
38 #include <inttypes.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <string.h>
43 #include "nbperf.h"
46 * A full description of the algorithm can be found in:
47 * "Simple and Space-Efficient Minimal Perfect Hash Functions"
48 * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007.
52 * The algorithm is based on random, acyclic 3-graphs.
54 * Each edge in the represents a key. The vertices are the reminder of
55 * the hash function mod n. n = cm with c > 1.23. This ensures that
56 * can be found with a very high probality.
58 * An acyclic graph has an edge order, where at least one vertex of
59 * each edge hasn't been seen before. It is declares the first unvisited
60 * vertex as authoritive for the edge and assigns a 2bit value to unvisited
61 * vertices, so that the sum of all vertices of the edge modulo 4 is
62 * the index of the authoritive vertex.
65 #include "graph3.h"
67 struct state {
68 struct graph3 graph;
69 uint32_t *visited;
70 uint32_t *holes64k;
71 uint16_t *holes256;
72 uint8_t *holes256_64;
73 uint8_t *holes256_128;
74 uint8_t *holes256_192;
75 uint8_t *g;
76 uint32_t *result_map;
79 static void
80 assign_nodes(struct state *state)
82 struct edge3 *e;
83 size_t i, j;
84 uint32_t t, r, holes;
86 for (i = 0; i < state->graph.v; ++i)
87 state->g[i] = 3;
89 for (i = 0; i < state->graph.e; ++i) {
90 j = state->graph.output_order[i];
91 e = &state->graph.edges[j];
92 if (!state->visited[e->left]) {
93 r = 0;
94 t = e->left;
95 } else if (!state->visited[e->middle]) {
96 r = 1;
97 t = e->middle;
98 } else {
99 if (state->visited[e->right])
100 abort();
101 r = 2;
102 t = e->right;
105 state->visited[t] = 2 + j;
106 if (state->visited[e->left] == 0)
107 state->visited[e->left] = 1;
108 if (state->visited[e->middle] == 0)
109 state->visited[e->middle] = 1;
110 if (state->visited[e->right] == 0)
111 state->visited[e->right] = 1;
113 state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle]
114 - state->g[e->right]) % 3;
117 holes = 0;
118 for (i = 0; i < state->graph.v; ++i) {
119 if (i % 65536 == 0)
120 state->holes64k[i >> 16] = holes;
122 if (i % 256 == 0)
123 state->holes256[i >> 8] = holes - state->holes64k[i >> 16];
125 if (i % 256 == 64)
126 state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
128 if (i % 256 == 128)
129 state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
131 if (i % 256 == 192)
132 state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
134 if (state->visited[i] > 1) {
135 j = state->visited[i] - 2;
136 state->result_map[j] = i - holes;
139 if (state->g[i] == 3)
140 ++holes;
143 if (i % 65536 != 0)
144 state->holes64k[(i >> 16) + 1] = holes;
146 if (i % 256 != 0)
147 state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8];
149 if (i % 256 != 64)
150 state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
152 if (i % 256 != 128)
153 state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
155 if (i % 256 != 192)
156 state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
159 static void
160 print_hash(struct nbperf *nbperf, struct state *state)
162 size_t i, j;
163 uint32_t sum;
165 fprintf(nbperf->output, "#include <stdlib.h>\n");
166 fprintf(nbperf->output, "#include <strings.h>\n\n");
168 fprintf(nbperf->output, "%suint32_t\n",
169 nbperf->static_hash ? "static " : "");
170 fprintf(nbperf->output,
171 "%s(const void * __restrict key, size_t keylen)\n",
172 nbperf->hash_name);
173 fprintf(nbperf->output, "{\n");
174 fprintf(nbperf->output,
175 "\tstatic const uint32_t g[%" PRId32 "] = {\n",
176 (state->graph.v + 15) / 16);
177 for (i = 0; i < state->graph.v; i += 16) {
178 for (j = 0, sum = 0; j < 16; ++j)
179 sum |= (uint32_t)state->g[i + j] << (2 * j);
181 fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s",
182 (i / 16 % 4 == 0 ? "\t " : " "),
183 sum,
184 (i / 16 % 4 == 3 ? "\n" : ""));
186 fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : ""));
188 fprintf(nbperf->output,
189 "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
190 (state->graph.v + 65535) / 65536);
191 for (i = 0; i < state->graph.v; i += 65536)
192 fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
193 (i / 65536 % 4 == 0 ? "\t " : " "),
194 state->holes64k[i >> 16],
195 (i / 65536 % 4 == 3 ? "\n" : ""));
196 fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
198 fprintf(nbperf->output,
199 "\tstatic const uint16_t holes256[%" PRId32 "] = {\n",
200 (state->graph.v + 255) / 256);
201 for (i = 0; i < state->graph.v; i += 256)
202 fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
203 (i / 256 % 4 == 0 ? "\t " : " "),
204 state->holes256[i >> 8],
205 (i / 256 % 4 == 3 ? "\n" : ""));
206 fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
208 fprintf(nbperf->output,
209 "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n",
210 (state->graph.v + 255) / 256);
211 for (i = 64; i < state->graph.v; i += 256)
212 fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
213 (i / 256 % 4 == 0 ? "\t " : " "),
214 state->holes256_64[i >> 8],
215 (i / 256 % 4 == 3 ? "\n" : ""));
216 fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
218 fprintf(nbperf->output,
219 "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n",
220 (state->graph.v + 255) / 256);
221 for (i = 128; i < state->graph.v; i += 256)
222 fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
223 (i / 256 % 4 == 0 ? "\t " : " "),
224 state->holes256_128[i >> 8],
225 (i / 256 % 4 == 3 ? "\n" : ""));
226 fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
228 fprintf(nbperf->output,
229 "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n",
230 (state->graph.v + 255) / 256);
231 for (i = 192; i < state->graph.v; i += 256)
232 fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
233 (i / 256 % 4 == 0 ? "\t " : " "),
234 state->holes256_192[i >> 8],
235 (i / 256 % 4 == 3 ? "\n" : ""));
236 fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
238 fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
239 fprintf(nbperf->output, "\tuint32_t m;\n");
240 fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n");
242 (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
244 fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v);
245 fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v);
246 fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v);
248 fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n");
249 fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n");
250 fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n");
251 fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n");
252 fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n");
253 fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n");
255 fprintf(nbperf->output,
256 "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n"
257 "\t ((g[c1] >> c2) & 3)) %% 3];\n\n");
259 fprintf(nbperf->output,
260 "\tswitch ((idx >> 5) & 7) {\n"
261 "\tcase 0:\n"
262 "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n"
263 "\t\tbreak;\n"
264 "\tcase 1: case 2:\n"
265 "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
266 "\t\t - holes256_64[idx >> 8];\n"
267 "\t\tbreak;\n"
268 "\tcase 3: case 4:\n"
269 "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
270 "\t\t - holes256_128[idx >> 8];\n"
271 "\t\tbreak;\n"
272 "\tcase 5: case 6:\n"
273 "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
274 "\t\t - holes256_192[idx >> 8];\n"
275 "\t\tbreak;\n"
276 "\tcase 7:\n"
277 "\t\tidx2 = idx - holes64k[(idx + 32) >> 16] -\n"
278 "\t\t holes256[(idx + 32) >> 8];\n"
279 "\t\tbreak;\n"
280 "\tdefault:\n"
281 "\t\tabort();\n"
282 "\t}\n"
283 "\tswitch ((idx >> 4) & 3) {\n"
284 "\tcase 1:\n"
285 "\t\tm = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n"
286 "\t\tidx2 -= popcount32(m);\n"
287 "\tcase 0:\n"
288 "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
289 "\t\tm &= ((2U << (2 * (idx & 15))) - 1);\n"
290 "\t\tidx2 -= popcount32(m);\n"
291 "\t\tbreak;\n"
292 "\tcase 2:\n"
293 "\t\tm = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n"
294 "\t\tidx2 += popcount32(m);\n"
295 "\tcase 3:\n"
296 "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
297 "\t\tm &= ~((2U << (2 * (idx & 15))) - 1);\n"
298 "\t\tidx2 += popcount32(m);\n"
299 "\t\tbreak;\n"
300 "\t}\n\n");
302 fprintf(nbperf->output,
303 "\treturn idx2;\n");
304 fprintf(nbperf->output, "}\n");
306 if (nbperf->map_output != NULL) {
307 for (i = 0; i < state->graph.e; ++i)
308 fprintf(nbperf->map_output, "%" PRIu32 "\n",
309 state->result_map[i]);
314 bdz_compute(struct nbperf *nbperf)
316 struct state state;
317 int retval = -1;
318 uint32_t v, e;
320 if (nbperf->c == 0)
321 nbperf->c = 1.24;
322 if (nbperf->c < 1.24)
323 errx(1, "The argument for option -c must be at least 1.24");
324 if (nbperf->hash_size < 3)
325 errx(1, "The hash function must generate at least 3 values");
327 (*nbperf->seed_hash)(nbperf);
328 e = nbperf->n;
329 v = nbperf->c * nbperf->n;
330 if (1.24 * nbperf->n > v)
331 ++v;
332 if (v < 10)
333 v = 10;
335 graph3_setup(&state.graph, v, e);
337 state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1);
338 state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1);
339 state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
340 state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
341 state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
342 state.g = calloc(sizeof(uint32_t), v);
343 state.visited = calloc(sizeof(uint32_t), v);
344 state.result_map = calloc(sizeof(uint32_t), e);
346 if (state.holes64k == NULL || state.holes256 == NULL ||
347 state.holes256_64 == NULL || state.holes256_128 == NULL ||
348 state.holes256_192 == NULL || state.g == NULL ||
349 state.visited == NULL || state.result_map == NULL)
350 err(1, "malloc failed");
352 if (graph3_hash(nbperf, &state.graph))
353 goto failed;
354 if (graph3_output_order(&state.graph))
355 goto failed;
356 assign_nodes(&state);
357 print_hash(nbperf, &state);
359 retval = 0;
361 failed:
362 graph3_free(&state.graph);
363 free(state.visited);
364 free(state.g);
365 free(state.holes64k);
366 free(state.holes256);
367 free(state.holes256_64);
368 free(state.holes256_128);
369 free(state.holes256_192);
370 free(state.result_map);
371 return retval;