From e50f1db8c3b82d71afcf889597cc87c5859e5e9f Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sat, 3 Jul 2010 03:49:51 +0200 Subject: [PATCH] Probdist: Convert all values to newly introduced fixp_t We have unending problems with accuracy; the fixed-point type should fit it all, and perhaps also be faster. --- board.c | 7 +++--- fixp.h | 23 +++++++++++++++++++ playout/elo.c | 71 ++++++++++++++++++++++++++++++++++++----------------------- probdist.c | 17 +++++++------- probdist.h | 18 +++++++-------- uct/walk.c | 3 ++- 6 files changed, 88 insertions(+), 51 deletions(-) create mode 100644 fixp.h diff --git a/board.c b/board.c index cb95d22..084b0b9 100644 --- a/board.c +++ b/board.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -291,8 +292,8 @@ board_clear(struct board *board) #ifdef BOARD_GAMMA board->prob[0].b = board->prob[1].b = board; foreach_point(board) { - probdist_set(&board->prob[0], c, (board_at(board, c) == S_NONE) * 1.0f); - probdist_set(&board->prob[1], c, (board_at(board, c) == S_NONE) * 1.0f); + probdist_set(&board->prob[0], c, double_to_fixp((board_at(board, c) == S_NONE) * 1.0f)); + probdist_set(&board->prob[1], c, double_to_fixp((board_at(board, c) == S_NONE) * 1.0f)); } foreach_point_end; #endif } @@ -428,7 +429,7 @@ board_gamma_update(struct board *board, coord_t coord, enum stone color) value *= board->gamma->gamma[FEAT_AESCAPE][0]; if (!trait_at(board, coord, color).safe) value *= board->gamma->gamma[FEAT_SELFATARI][1 + board->precise_selfatari]; - probdist_set(&board->prob[color - 1], coord, value); + probdist_set(&board->prob[color - 1], coord, double_to_fixp(value)); #endif } diff --git a/fixp.h b/fixp.h new file mode 100644 index 0000000..0e27576 --- /dev/null +++ b/fixp.h @@ -0,0 +1,23 @@ +#ifndef ZZGO_FIXP_H +#define ZZGO_FIXP_H + +/* Tools for counting fixed-point numbers. */ + +/* We implement a simple fixed-point number type, with fixed number of + * fractional binary digits after the radix point. */ + +#include + +typedef uint_fast32_t fixp_t; + +/* We should accomodate at least 0..131072 (17bits) in the whole number + * portion; assuming at least 32bit integer, that leaves us with 15-bit + * fractional part. Thankfully, we need only unsigned values. */ +#define FIXP_BITS 15 + +#define FIXP_SCALE (1< %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, m.coord), pd->items[f]); + probdist_set(pd, m.coord, double_to_fixp(g)); + //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), pd->items[f]); } return moves; @@ -119,12 +120,12 @@ struct lprobdist { int n; #define LPD_MAX 8 coord_t coords[LPD_MAX]; - double items[LPD_MAX]; - double total; + fixp_t items[LPD_MAX]; + fixp_t total; /* Backups of original totals for restoring. */ - double btotal; - double browtotals_v[10]; + fixp_t btotal; + fixp_t browtotals_v[10]; int browtotals_i[10]; int browtotals_n; }; @@ -135,8 +136,9 @@ static void elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc) { #if 0 +#define PROBDIST_EPSILON double_to_fixp(0.01) struct elo_policy *pp = p->data; - if (pd->total < PROBDIST_EPSILON) + if (pd->total == 0) return; /* Compare to the manually created distribution. */ @@ -147,20 +149,30 @@ elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play for (int i = 0; i < b->flen; i++) { coord_t c = b->f[i]; if (is_pass(c)) continue; - // XXX: Hardcoded ignores[] structure - if (ignores[0] == c) continue; - double val = pd->items[c]; + if (c == b->ko.coord) continue; + fixp_t val = pd->items[c]; if (!is_pass(lc) && coord_is_8adjecent(lc, c, b)) for (int j = 0; j < lpd->n; j++) - if (lpd->coords[j] == c) + if (lpd->coords[j] == c) { val = lpd->items[j]; - if (fabs(pdx.items[c] - val) < PROBDIST_EPSILON) + probdist_mute(&pdx, c); + + } + if (abs(pdx.items[c] - val) < PROBDIST_EPSILON) continue; - printf("[%s %d] manual %f board %f ", coord2sstr(c, b), b->pat3[c], pdx.items[c], pd->items[c]); + printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c])); board_gamma_update(b, c, to_play); - printf("plainboard %f\n", pd->items[c]); + printf("plainboard %f\n", fixp_to_double(pd->items[c])); + assert(0); + } + for (int r = 0; r < board_size(b); r++) { + if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON) + continue; + fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r])); assert(0); } + assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON); +#undef PROBDIST_EPSILON #endif } @@ -180,7 +192,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play pp->callback(pp->callback_data, b, to_play, pd); if (PLDEBUGL(5)) { - fprintf(stderr, "pd total pre %lf lpd %lf\n", pd->total, lpd.total); + fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total)); } #define ignore_move(c_) do { \ @@ -197,7 +209,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \ probdist_mute(pd, c_); \ if (PLDEBUGL(6)) \ - fprintf(stderr, "ignored move %s(%lf) => tot pd %lf lpd %lf\n", coord2sstr(c_, pd->b), pd->items[c_], pd->total, lpd.total); \ + fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \ } while (0) /* Make sure ko-prohibited move does not get picked. */ @@ -213,7 +225,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play continue; // already ignored ignore_move(c); - double val = probdist_one(pd, c) * b->gamma->gamma[FEAT_CONTIGUITY][1]; + fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]); lpd.coords[lpd.n] = c; lpd.items[lpd.n++] = val; lpd.total += val; @@ -222,21 +234,21 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play ignores[ignores_n] = pass; if (PLDEBUGL(5)) - fprintf(stderr, "pd total post %lf lpd %lf\n", pd->total, lpd.total); + fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total)); /* Verify sanity, possibly. */ elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord); /* Pick a move. */ coord_t c = pass; - double stab = fast_frandom() * (lpd.total + pd->total); + fixp_t stab = fast_irandom(lpd.total + pd->total); if (PLDEBUGL(5)) - fprintf(stderr, "stab %lf / (%lf + %lf)\n", stab, lpd.total, pd->total); - if (stab < lpd.total - PROBDIST_EPSILON) { + fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total)); + if (stab < lpd.total) { /* Local probdist. */ if (PLDEBUGL(6)) { /* Some debug prints. */ - double tot = 0; + fixp_t tot = 0; for (int i = 0; i < lpd.n; i++) { tot += lpd.items[i]; struct pattern p; @@ -247,7 +259,10 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play } pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m); char s[256] = ""; pattern2str(s, &p); - fprintf(stderr, "coord %s <%lf> [tot %lf] %s (p3:%d)\n", coord2sstr(lpd.coords[i], b), lpd.items[i], tot, s, pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]])); + fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n", + coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]), + fixp_to_double(tot), s, + pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]])); } } for (int i = 0; i < lpd.n; i++) { @@ -258,11 +273,11 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play stab -= lpd.items[i]; } if (is_pass(c)) { - fprintf(stderr, "elo: local overstab [%lf]\n", stab); + fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab)); abort(); } - } else if (pd->total >= PROBDIST_EPSILON) { + } else if (pd->total > 0) { /* Global probdist. */ /* XXX: We re-stab inside. */ c = probdist_pick(pd, ignores); @@ -284,7 +299,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play pd->items[b->f[i]] = 0; board_gamma_update(b, b->f[i], to_play); } - assert(fabs(pd->total - lpd.btotal) < PROBDIST_EPSILON); + assert(pd->total == lpd.btotal); } else { pd->total = lpd.btotal; @@ -307,7 +322,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play elo_get_probdist(p, &pp->choose, b, to_play, &pd); if (pp->callback) pp->callback(pp->callback_data, b, to_play, &pd); - if (pd.total < PROBDIST_EPSILON) + if (pd.total == 0) return pass; int ignores[1] = { pass }; coord_t c = probdist_pick(&pd, ignores); @@ -333,7 +348,7 @@ playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games) coord_t c = map->b->f[f]; if (!map->consider[c]) continue; - add_prior_value(map, c, probdist_one(&pd, c) / probdist_total(&pd), games); + add_prior_value(map, c, fixp_to_double(probdist_one(&pd, c)) / fixp_to_double(probdist_total(&pd)), games); } } diff --git a/probdist.c b/probdist.c index ba1cf9e..a611045 100644 --- a/probdist.c +++ b/probdist.c @@ -13,17 +13,16 @@ coord_t probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore) { - double total = probdist_total(pd) - PROBDIST_EPSILON; - assert(total >= 0); - double stab = fast_frandom() * total; + fixp_t total = probdist_total(pd); + fixp_t stab = fast_irandom(total); if (DEBUGL(6)) - fprintf(stderr, "stab %f / %f\n", stab, total); + fprintf(stderr, "stab %f / %f\n", fixp_to_double(stab), fixp_to_double(total)); int r = 0; coord_t c = 0; - while (stab > pd->rowtotals[r] + PROBDIST_EPSILON) { + while (stab > pd->rowtotals[r]) { if (DEBUGL(6)) - fprintf(stderr, "[%s] skipping row %f (%f)\n", coord2sstr(c, pd->b), pd->rowtotals[r], stab); + fprintf(stderr, "[%s] skipping row %f (%f)\n", coord2sstr(c, pd->b), fixp_to_double(pd->rowtotals[r]), fixp_to_double(stab)); stab -= pd->rowtotals[r]; r++; assert(r < board_size(pd->b)); @@ -35,12 +34,12 @@ probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore) for (; c < board_size2(pd->b); c++) { if (DEBUGL(6)) - fprintf(stderr, "[%s] %f (%f)\n", coord2sstr(c, pd->b), pd->items[c], stab); + fprintf(stderr, "[%s] %f (%f)\n", coord2sstr(c, pd->b), fixp_to_double(pd->items[c]), fixp_to_double(stab)); assert(is_pass(*ignore) || c <= *ignore); if (c == *ignore) { if (DEBUGL(6)) - fprintf(stderr, "ignored\n"); + fprintf(stderr, "\tignored\n"); ignore++; continue; } @@ -50,7 +49,7 @@ probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore) stab -= pd->items[c]; } - fprintf(stderr, "overstab %f (total %f)\n", stab, total); + fprintf(stderr, "overstab %f (total %f)\n", fixp_to_double(stab), fixp_to_double(total)); assert(0); return -1; } diff --git a/probdist.h b/probdist.h index dc56899..36d4a71 100644 --- a/probdist.h +++ b/probdist.h @@ -7,6 +7,7 @@ * initialized, then random items assigned a value repeatedly and * random items picked repeatedly as well. */ +#include "fixp.h" #include "move.h" #include "util.h" @@ -17,19 +18,16 @@ struct board; struct probdist { struct board *b; - double *items; // [bsize2], [i] = P(pick==i) - double *rowtotals; // [bsize], [i] = sum of items in row i - double total; // sum of all items + fixp_t *items; // [bsize2], [i] = P(pick==i) + fixp_t *rowtotals; // [bsize], [i] = sum of items in row i + fixp_t total; // sum of all items }; -/* Probability so small that it's same as zero; used to compensate - * for probdist.total inaccuracies. */ -#define PROBDIST_EPSILON 0.05 /* Declare pd_ corresponding to board b_ in the local scope. */ #define probdist_alloca(pd_, b_) \ - double pd_ ## __pdi[board_size2(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdi, 0, sizeof(pd_ ## __pdi)); \ - double pd_ ## __pdr[board_size(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdr, 0, sizeof(pd_ ## __pdr)); \ + fixp_t pd_ ## __pdi[board_size2(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdi, 0, sizeof(pd_ ## __pdi)); \ + fixp_t pd_ ## __pdr[board_size(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdr, 0, sizeof(pd_ ## __pdr)); \ struct probdist pd_ = { .b = b_, .items = pd_ ## __pdi, .rowtotals = pd_ ## __pdr, .total = 0 }; /* Get the value of given item. */ @@ -39,7 +37,7 @@ struct probdist { #define probdist_total(pd) ((pd)->total) /* Set the value of given item. */ -static void probdist_set(struct probdist *pd, coord_t c, double val); +static void probdist_set(struct probdist *pd, coord_t c, fixp_t val); /* Remove the item from the totals; this is used when you then * pass it in the ignore list to probdist_pick(). Of course you @@ -57,7 +55,7 @@ coord_t probdist_pick(struct probdist *pd, coord_t *ignore); static inline void -probdist_set(struct probdist *pd, coord_t c, double val) +probdist_set(struct probdist *pd, coord_t c, fixp_t val) { /* We disable the assertions here since this is quite time-critical * part of code, and also the compiler is reluctant to inline the diff --git a/uct/walk.c b/uct/walk.c index dfb1175..83a739f 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -147,7 +148,7 @@ uct_playout_probdist(void *data, struct board *b, enum stone to_play, struct pro for (; li; li = li->sibling) { if (board_at(b, li->coord) != S_NONE) continue; - probdist_set(pd, li->coord, pd->items[li->coord] * li_gamma(to_play, li)); + probdist_set(pd, li->coord, double_to_fixp(pd->items[li->coord] * li_gamma(to_play, li))); } } -- 2.11.4.GIT