From e50f1db8c3b82d71afcf889597cc87c5859e5e9f Mon Sep 17 00:00:00 2001
From: Petr Baudis <pasky@ucw.cz>
Date: Sat, 3 Jul 2010 03:49:51 +0200
Subject: [PATCH] Probdist: Convert all values to newly introduced fixp_t

We have unending problems with accuracy; the fixed-point type should fit
it all, and perhaps also be faster.
---
 board.c       |  7 +++---
 fixp.h        | 23 +++++++++++++++++++
 playout/elo.c | 71 ++++++++++++++++++++++++++++++++++++-----------------------
 probdist.c    | 17 +++++++-------
 probdist.h    | 18 +++++++--------
 uct/walk.c    |  3 ++-
 6 files changed, 88 insertions(+), 51 deletions(-)
 create mode 100644 fixp.h

diff --git a/board.c b/board.c
index cb95d22..084b0b9 100644
--- a/board.c
+++ b/board.c
@@ -1,5 +1,6 @@
 #include <alloca.h>
 #include <assert.h>
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -291,8 +292,8 @@ board_clear(struct board *board)
 #ifdef BOARD_GAMMA
 	board->prob[0].b = board->prob[1].b = board;
 	foreach_point(board) {
-		probdist_set(&board->prob[0], c, (board_at(board, c) == S_NONE) * 1.0f);
-		probdist_set(&board->prob[1], c, (board_at(board, c) == S_NONE) * 1.0f);
+		probdist_set(&board->prob[0], c, double_to_fixp((board_at(board, c) == S_NONE) * 1.0f));
+		probdist_set(&board->prob[1], c, double_to_fixp((board_at(board, c) == S_NONE) * 1.0f));
 	} foreach_point_end;
 #endif
 }
@@ -428,7 +429,7 @@ board_gamma_update(struct board *board, coord_t coord, enum stone color)
 		value *= board->gamma->gamma[FEAT_AESCAPE][0];
 	if (!trait_at(board, coord, color).safe)
 		value *= board->gamma->gamma[FEAT_SELFATARI][1 + board->precise_selfatari];
-	probdist_set(&board->prob[color - 1], coord, value);
+	probdist_set(&board->prob[color - 1], coord, double_to_fixp(value));
 #endif
 }
 
diff --git a/fixp.h b/fixp.h
new file mode 100644
index 0000000..0e27576
--- /dev/null
+++ b/fixp.h
@@ -0,0 +1,23 @@
+#ifndef ZZGO_FIXP_H
+#define ZZGO_FIXP_H
+
+/* Tools for counting fixed-point numbers. */
+
+/* We implement a simple fixed-point number type, with fixed number of
+ * fractional binary digits after the radix point. */
+
+#include <stdint.h>
+
+typedef uint_fast32_t fixp_t;
+
+/* We should accomodate at least 0..131072 (17bits) in the whole number
+ * portion; assuming at least 32bit integer, that leaves us with 15-bit
+ * fractional part. Thankfully, we need only unsigned values. */
+#define FIXP_BITS 15
+
+#define FIXP_SCALE (1<<FIXP_BITS)
+
+#define double_to_fixp(n) (nearbyint((n) * (FIXP_SCALE)))
+#define fixp_to_double(n) ((double) (n) / FIXP_SCALE)
+
+#endif
diff --git a/playout/elo.c b/playout/elo.c
index a09b593..938c17e 100644
--- a/playout/elo.c
+++ b/playout/elo.c
@@ -25,6 +25,7 @@
 //#define DEBUG
 #include "board.h"
 #include "debug.h"
+#include "fixp.h"
 #include "pattern.h"
 #include "patternsp.h"
 #include "playout.h"
@@ -107,8 +108,8 @@ skip_move:
 			g *= gamma;
 		}
 
-		probdist_set(pd, m.coord, g);
-		//fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, m.coord), pd->items[f]);
+		probdist_set(pd, m.coord, double_to_fixp(g));
+		//fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), pd->items[f]);
 	}
 
 	return moves;
@@ -119,12 +120,12 @@ struct lprobdist {
 	int n;
 #define LPD_MAX 8
 	coord_t coords[LPD_MAX];
-	double items[LPD_MAX];
-	double total;
+	fixp_t items[LPD_MAX];
+	fixp_t total;
 	
 	/* Backups of original totals for restoring. */
-	double btotal;
-	double browtotals_v[10];
+	fixp_t btotal;
+	fixp_t browtotals_v[10];
 	int browtotals_i[10];
 	int browtotals_n;
 };
@@ -135,8 +136,9 @@ static void
 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
 {
 #if 0
+#define PROBDIST_EPSILON double_to_fixp(0.01)
 	struct elo_policy *pp = p->data;
-	if (pd->total < PROBDIST_EPSILON)
+	if (pd->total == 0)
 		return;
 
 	/* Compare to the manually created distribution. */
@@ -147,20 +149,30 @@ elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play
 	for (int i = 0; i < b->flen; i++) {
 		coord_t c = b->f[i];
 		if (is_pass(c)) continue;
-		// XXX: Hardcoded ignores[] structure
-		if (ignores[0] == c) continue;
-		double val = pd->items[c];
+		if (c == b->ko.coord) continue;
+		fixp_t val = pd->items[c];
 		if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
 			for (int j = 0; j < lpd->n; j++)
-				if (lpd->coords[j] == c)
+				if (lpd->coords[j] == c) {
 					val = lpd->items[j];
-		if (fabs(pdx.items[c] - val) < PROBDIST_EPSILON)
+					probdist_mute(&pdx, c);
+
+				}
+		if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
 			continue;
-		printf("[%s %d] manual %f board %f ", coord2sstr(c, b), b->pat3[c], pdx.items[c], pd->items[c]);
+		printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
 		board_gamma_update(b, c, to_play);
-		printf("plainboard %f\n", pd->items[c]);
+		printf("plainboard %f\n", fixp_to_double(pd->items[c]));
+		assert(0);
+	}
+	for (int r = 0; r < board_size(b); r++) {
+		if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
+			continue;
+		fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
 		assert(0);
 	}
+	assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
+#undef PROBDIST_EPSILON
 #endif
 }
 
@@ -180,7 +192,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 		pp->callback(pp->callback_data, b, to_play, pd);
 
 	if (PLDEBUGL(5)) {
-		fprintf(stderr, "pd total pre %lf lpd %lf\n", pd->total, lpd.total);
+		fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 	}
 
 #define ignore_move(c_) do { \
@@ -197,7 +209,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 	lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
 	probdist_mute(pd, c_); \
 	if (PLDEBUGL(6)) \
-		fprintf(stderr, "ignored move %s(%lf) => tot pd %lf lpd %lf\n", coord2sstr(c_, pd->b), pd->items[c_], pd->total, lpd.total); \
+		fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
 } while (0)
 
 	/* Make sure ko-prohibited move does not get picked. */
@@ -213,7 +225,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 				continue; // already ignored
 			ignore_move(c);
 
-			double val = probdist_one(pd, c) * b->gamma->gamma[FEAT_CONTIGUITY][1];
+			fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
 			lpd.coords[lpd.n] = c;
 			lpd.items[lpd.n++] = val;
 			lpd.total += val;
@@ -222,21 +234,21 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 
 	ignores[ignores_n] = pass;
 	if (PLDEBUGL(5))
-		fprintf(stderr, "pd total post %lf lpd %lf\n", pd->total, lpd.total);
+		fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 
 	/* Verify sanity, possibly. */
 	elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
 
 	/* Pick a move. */
 	coord_t c = pass;
-	double stab = fast_frandom() * (lpd.total + pd->total);
+	fixp_t stab = fast_irandom(lpd.total + pd->total);
 	if (PLDEBUGL(5))
-		fprintf(stderr, "stab %lf / (%lf + %lf)\n", stab, lpd.total, pd->total);
-	if (stab < lpd.total - PROBDIST_EPSILON) {
+		fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
+	if (stab < lpd.total) {
 		/* Local probdist. */
 		if (PLDEBUGL(6)) {
 			/* Some debug prints. */
-			double tot = 0;
+			fixp_t tot = 0;
 			for (int i = 0; i < lpd.n; i++) {
 				tot += lpd.items[i];
 				struct pattern p;
@@ -247,7 +259,10 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 				}
 				pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
 				char s[256] = ""; pattern2str(s, &p);
-				fprintf(stderr, "coord %s <%lf> [tot %lf] %s (p3:%d)\n", coord2sstr(lpd.coords[i], b), lpd.items[i], tot, s, pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
+				fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
+					coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
+					fixp_to_double(tot), s,
+					pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
 			}
 		}
 		for (int i = 0; i < lpd.n; i++) {
@@ -258,11 +273,11 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 			stab -= lpd.items[i];
 		}
 		if (is_pass(c)) {
-			fprintf(stderr, "elo: local overstab [%lf]\n", stab);
+			fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
 			abort();
 		}
 
-	} else if (pd->total >= PROBDIST_EPSILON) {
+	} else if (pd->total > 0) {
 		/* Global probdist. */
 		/* XXX: We re-stab inside. */
 		c = probdist_pick(pd, ignores);
@@ -284,7 +299,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 			pd->items[b->f[i]] = 0;
 			board_gamma_update(b, b->f[i], to_play);
 		}
-		assert(fabs(pd->total - lpd.btotal) < PROBDIST_EPSILON);
+		assert(pd->total == lpd.btotal);
 
 	} else {
 		pd->total = lpd.btotal;
@@ -307,7 +322,7 @@ playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play
 	elo_get_probdist(p, &pp->choose, b, to_play, &pd);
 	if (pp->callback)
 		pp->callback(pp->callback_data, b, to_play, &pd);
-	if (pd.total < PROBDIST_EPSILON)
+	if (pd.total == 0)
 		return pass;
 	int ignores[1] = { pass };
 	coord_t c = probdist_pick(&pd, ignores);
@@ -333,7 +348,7 @@ playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
 		coord_t c = map->b->f[f];
 		if (!map->consider[c])
 			continue;
-		add_prior_value(map, c, probdist_one(&pd, c) / probdist_total(&pd), games);
+		add_prior_value(map, c, fixp_to_double(probdist_one(&pd, c)) / fixp_to_double(probdist_total(&pd)), games);
 	}
 }
 
diff --git a/probdist.c b/probdist.c
index ba1cf9e..a611045 100644
--- a/probdist.c
+++ b/probdist.c
@@ -13,17 +13,16 @@
 coord_t
 probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore)
 {
-	double total = probdist_total(pd) - PROBDIST_EPSILON;
-	assert(total >= 0);
-	double stab = fast_frandom() * total;
+	fixp_t total = probdist_total(pd);
+	fixp_t stab = fast_irandom(total);
 	if (DEBUGL(6))
-		fprintf(stderr, "stab %f / %f\n", stab, total);
+		fprintf(stderr, "stab %f / %f\n", fixp_to_double(stab), fixp_to_double(total));
 
 	int r = 0;
 	coord_t c = 0;
-	while (stab > pd->rowtotals[r] + PROBDIST_EPSILON) {
+	while (stab > pd->rowtotals[r]) {
 		if (DEBUGL(6))
-			fprintf(stderr, "[%s] skipping row %f (%f)\n", coord2sstr(c, pd->b), pd->rowtotals[r], stab);
+			fprintf(stderr, "[%s] skipping row %f (%f)\n", coord2sstr(c, pd->b), fixp_to_double(pd->rowtotals[r]), fixp_to_double(stab));
 
 		stab -= pd->rowtotals[r];
 		r++; assert(r < board_size(pd->b));
@@ -35,12 +34,12 @@ probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore)
 
 	for (; c < board_size2(pd->b); c++) {
 		if (DEBUGL(6))
-			fprintf(stderr, "[%s] %f (%f)\n", coord2sstr(c, pd->b), pd->items[c], stab);
+			fprintf(stderr, "[%s] %f (%f)\n", coord2sstr(c, pd->b), fixp_to_double(pd->items[c]), fixp_to_double(stab));
 
 		assert(is_pass(*ignore) || c <= *ignore);
 		if (c == *ignore) {
 			if (DEBUGL(6))
-				fprintf(stderr, "ignored\n");
+				fprintf(stderr, "\tignored\n");
 			ignore++;
 			continue;
 		}
@@ -50,7 +49,7 @@ probdist_pick(struct probdist *restrict pd, coord_t *restrict ignore)
 		stab -= pd->items[c];
 	}
 
-	fprintf(stderr, "overstab %f (total %f)\n", stab, total);
+	fprintf(stderr, "overstab %f (total %f)\n", fixp_to_double(stab), fixp_to_double(total));
 	assert(0);
 	return -1;
 }
diff --git a/probdist.h b/probdist.h
index dc56899..36d4a71 100644
--- a/probdist.h
+++ b/probdist.h
@@ -7,6 +7,7 @@
  * initialized, then random items assigned a value repeatedly and
  * random items picked repeatedly as well. */
 
+#include "fixp.h"
 #include "move.h"
 #include "util.h"
 
@@ -17,19 +18,16 @@ struct board;
 
 struct probdist {
 	struct board *b;
-	double *items; // [bsize2], [i] = P(pick==i)
-	double *rowtotals; // [bsize], [i] = sum of items in row i
-	double total; // sum of all items
+	fixp_t *items; // [bsize2], [i] = P(pick==i)
+	fixp_t *rowtotals; // [bsize], [i] = sum of items in row i
+	fixp_t total; // sum of all items
 };
-/* Probability so small that it's same as zero; used to compensate
- * for probdist.total inaccuracies. */
-#define PROBDIST_EPSILON 0.05
 
 
 /* Declare pd_ corresponding to board b_ in the local scope. */
 #define probdist_alloca(pd_, b_) \
-	double pd_ ## __pdi[board_size2(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdi, 0, sizeof(pd_ ## __pdi)); \
-	double pd_ ## __pdr[board_size(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdr, 0, sizeof(pd_ ## __pdr)); \
+	fixp_t pd_ ## __pdi[board_size2(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdi, 0, sizeof(pd_ ## __pdi)); \
+	fixp_t pd_ ## __pdr[board_size(b_)] __attribute__((aligned(32))); memset(pd_ ## __pdr, 0, sizeof(pd_ ## __pdr)); \
 	struct probdist pd_ = { .b = b_, .items = pd_ ## __pdi, .rowtotals = pd_ ## __pdr, .total = 0 };
 
 /* Get the value of given item. */
@@ -39,7 +37,7 @@ struct probdist {
 #define probdist_total(pd) ((pd)->total)
 
 /* Set the value of given item. */
-static void probdist_set(struct probdist *pd, coord_t c, double val);
+static void probdist_set(struct probdist *pd, coord_t c, fixp_t val);
 
 /* Remove the item from the totals; this is used when you then
  * pass it in the ignore list to probdist_pick(). Of course you
@@ -57,7 +55,7 @@ coord_t probdist_pick(struct probdist *pd, coord_t *ignore);
 
 
 static inline void
-probdist_set(struct probdist *pd, coord_t c, double val)
+probdist_set(struct probdist *pd, coord_t c, fixp_t val)
 {
 	/* We disable the assertions here since this is quite time-critical
 	 * part of code, and also the compiler is reluctant to inline the
diff --git a/uct/walk.c b/uct/walk.c
index dfb1175..83a739f 100644
--- a/uct/walk.c
+++ b/uct/walk.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <math.h>
 #include <pthread.h>
 #include <signal.h>
 #include <stdio.h>
@@ -147,7 +148,7 @@ uct_playout_probdist(void *data, struct board *b, enum stone to_play, struct pro
 	for (; li; li = li->sibling) {
 		if (board_at(b, li->coord) != S_NONE)
 			continue;
-		probdist_set(pd, li->coord, pd->items[li->coord] * li_gamma(to_play, li));
+		probdist_set(pd, li->coord, double_to_fixp(pd->items[li->coord] * li_gamma(to_play, li)));
 	}
 }
 
-- 
2.11.4.GIT