From c2ab39fcc67cea025b19fa7a16bc13bb667d8d96 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sun, 8 Jan 2012 17:46:04 +0100 Subject: [PATCH] Probability pattern ditionary: Hash spatials only when adding their patterns This way, most important spatials will prevail over obscure ones in the hash. --- patternplay/patternplay.c | 2 +- patternprob.c | 20 ++++++++++++++++++++ patternscan/patternscan.c | 2 +- patternsp.c | 32 +++++++++++++++++++++----------- patternsp.h | 13 +++++++++++-- 5 files changed, 54 insertions(+), 15 deletions(-) diff --git a/patternplay/patternplay.c b/patternplay/patternplay.c index 93f434c..4d5f7db 100644 --- a/patternplay/patternplay.c +++ b/patternplay/patternplay.c @@ -104,7 +104,7 @@ patternplay_state_init(char *arg) pp->debug_level = debug_level; pp->pc = DEFAULT_PATTERN_CONFIG; - pp->pc.spat_dict = spatial_dict_init(false); + pp->pc.spat_dict = spatial_dict_init(false, false); memcpy(&pp->ps, PATTERN_SPEC_MATCH_DEFAULT, sizeof(pattern_spec)); if (arg) { diff --git a/patternprob.c b/patternprob.c index 48212d6..09de23e 100644 --- a/patternprob.c +++ b/patternprob.c @@ -34,6 +34,9 @@ pattern_pdict_init(char *filename, struct pattern_config *pc) dict->pc = pc; dict->table = calloc2(pc->spat_dict->nspatials + 1, sizeof(*dict->table)); + char *sphcachehit = malloc(pc->spat_dict->nspatials); + hash_t (*sphcache)[PTH__ROTATIONS] = malloc(pc->spat_dict->nspatials * sizeof(sphcache[0])); + int i = 0; char sbuf[1024]; while (fgets(sbuf, sizeof(sbuf), f)) { @@ -55,9 +58,26 @@ pattern_pdict_init(char *filename, struct pattern_config *pc) uint32_t spi = pattern2spatial(dict, &pb->p); pb->next = dict->table[spi]; dict->table[spi] = pb; + + /* We rehash spatials in the order of loaded patterns. This way + * we make sure that the most popular patterns will be hashed + * last and therefore take priority. */ + if (!sphcachehit[spi]) { + sphcachehit[spi] = 1; + for (int r = 0; r < PTH__ROTATIONS; r++) + sphcache[spi][r] = spatial_hash(r, &pc->spat_dict->spatials[spi]); + } + for (int r = 0; r < PTH__ROTATIONS; r++) + spatial_dict_addh(pc->spat_dict, sphcache[spi][r], spi); + i++; } + free(sphcache); + free(sphcachehit); + if (DEBUGL(3)) + spatial_dict_hashstats(pc->spat_dict); + fclose(f); if (DEBUGL(1)) fprintf(stderr, "Loaded %d pattern-probability pairs.\n", i); diff --git a/patternscan/patternscan.c b/patternscan/patternscan.c index f240ec7..fffe8d5 100644 --- a/patternscan/patternscan.c +++ b/patternscan/patternscan.c @@ -225,7 +225,7 @@ patternscan_state_init(char *arg) } } for (int i = 0; i < FEAT_MAX; i++) if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL)) ps->ps[i] = 0; - ps->pc.spat_dict = spatial_dict_init(ps->gen_spat_dict); + ps->pc.spat_dict = spatial_dict_init(ps->gen_spat_dict, true); ps->loaded_spatials = ps->pc.spat_dict->nspatials; return ps; diff --git a/patternsp.c b/patternsp.c index faa41a3..80e3971 100644 --- a/patternsp.c +++ b/patternsp.c @@ -228,7 +228,7 @@ spatial_dict_addc(struct spatial_dict *dict, struct spatial *s) return dict->nspatials++; } -static bool +bool spatial_dict_addh(struct spatial_dict *dict, hash_t hash, unsigned int id) { if (dict->hash[hash] && dict->hash[hash] != id) @@ -246,7 +246,7 @@ spatial_dict_addh(struct spatial_dict *dict, hash_t hash, unsigned int id) * HASH...: space-separated 18bit hash-table indices for the pattern */ static void -spatial_dict_read(struct spatial_dict *dict, char *buf) +spatial_dict_read(struct spatial_dict *dict, char *buf, bool hash) { /* XXX: We trust the data. Bad data will crash us. */ char *bufp = buf; @@ -277,8 +277,9 @@ spatial_dict_read(struct spatial_dict *dict, char *buf) assert(id == index); /* Add to specified hash places. */ - for (int r = 0; r < PTH__ROTATIONS; r++) - spatial_dict_addh(dict, spatial_hash(r, &s), id); + if (hash) + for (int r = 0; r < PTH__ROTATIONS; r++) + spatial_dict_addh(dict, spatial_hash(r, &s), id); } void @@ -305,16 +306,25 @@ spatial_write(struct spatial_dict *dict, struct spatial *s, int id, FILE *f) } static void -spatial_dict_load(struct spatial_dict *dict, FILE *f) +spatial_dict_load(struct spatial_dict *dict, FILE *f, bool hash) { char buf[1024]; while (fgets(buf, sizeof(buf), f)) { if (buf[0] == '#') continue; - spatial_dict_read(dict, buf); + spatial_dict_read(dict, buf, hash); } - if (DEBUGL(1)) - fprintf(stderr, "Loaded spatial dictionary of %d patterns (hash: %d coll., %d effective, %.2f%% fill rate).\n", - dict->nspatials, dict->collisions, dict->collisions / PTH__ROTATIONS, + if (DEBUGL(1)) { + fprintf(stderr, "Loaded spatial dictionary of %d patterns.\n", dict->nspatials); + if (hash) + spatial_dict_hashstats(dict); + } +} + +void +spatial_dict_hashstats(struct spatial_dict *dict) +{ + fprintf(stderr, "\t(Spatial dictionary hash: %d coll., %d effective - still inflated, %.2f%% fill rate).\n", + dict->collisions, dict->collisions / PTH__ROTATIONS, (double) dict->nspatials * 100 / (sizeof(dict->hash) / sizeof(dict->hash[0]))); } @@ -341,7 +351,7 @@ static struct spatial_dict *cached_dict; const char *spatial_dict_filename = "patterns.spat"; struct spatial_dict * -spatial_dict_init(bool will_append) +spatial_dict_init(bool will_append, bool hash) { if (cached_dict && !will_append) return cached_dict; @@ -361,7 +371,7 @@ spatial_dict_init(bool will_append) spatial_dict_addc(dict, &dummy); if (f) { - spatial_dict_load(dict, f); + spatial_dict_load(dict, f, hash); fclose(f); f = NULL; } else { assert(will_append); diff --git a/patternsp.h b/patternsp.h index 4ebf427..944d8d9 100644 --- a/patternsp.h +++ b/patternsp.h @@ -109,8 +109,10 @@ struct spatial_dict { /* Initializes spatial dictionary, pre-loading existing records from * default filename if exists. If will_append is true, it will not - * complain about non-existing file and initialize the dictionary anyway. */ -struct spatial_dict *spatial_dict_init(bool will_append); + * complain about non-existing file and initialize the dictionary anyway. + * If hash is true, loaded spatials will be added to the hashtable; + * use false if this is to be done later (e.g. by patternprob). */ +struct spatial_dict *spatial_dict_init(bool will_append, bool hash); /* Lookup specified spatial pattern in the dictionary; return index * of the pattern. If the pattern is not found, 0 will be returned. */ @@ -121,6 +123,13 @@ static unsigned int spatial_dict_get(struct spatial_dict *dict, int dist, hash_t * file automatically. */ int spatial_dict_put(struct spatial_dict *dict, struct spatial *s, hash_t); +/* Readds given rotation of given pattern to the hash. This is useful only + * if you want to tweak hash priority of various patterns. */ +bool spatial_dict_addh(struct spatial_dict *dict, hash_t hash, unsigned int id); + +/* Print stats about the hash to stderr. Companion to spatial_dict_addh(). */ +void spatial_dict_hashstats(struct spatial_dict *dict); + /* Spatial dictionary file manipulation. */ -- 2.11.4.GIT