From cd7364d735092c237c647499ad2f4c7eae0e6474 Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Tue, 21 Dec 2010 10:16:41 +0100 Subject: [PATCH] Allow double for all floating point values in large configurations. --- Makefile | 4 +++ board.c | 6 ++--- board.h | 6 ++--- gtp.c | 4 +-- montecarlo/internal.h | 2 +- montecarlo/montecarlo.c | 6 ++--- ownermap.c | 2 +- ownermap.h | 4 +-- playout.c | 2 +- playout/elo.c | 2 +- random.c | 12 ++++----- random.h | 4 ++- stats.h | 28 ++++++++++----------- tactics/util.c | 4 +-- tactics/util.h | 2 +- timeinfo.h | 2 +- uct/dynkomi.c | 66 ++++++++++++++++++++++++------------------------- uct/dynkomi.h | 4 +-- uct/internal.h | 10 ++++---- uct/plugin/wolf.c | 14 +++++------ uct/policy/generic.h | 4 +-- uct/policy/ucb1.c | 8 +++--- uct/policy/ucb1amaf.c | 26 +++++++++---------- uct/prior.h | 6 ++--- uct/search.c | 4 +-- uct/tree.c | 4 +-- uct/tree.h | 6 ++--- uct/uct.c | 4 +-- uct/uct.h | 2 +- uct/walk.c | 14 +++++------ util.h | 6 +++++ 31 files changed, 140 insertions(+), 128 deletions(-) diff --git a/Makefile b/Makefile index 924da9d..8140a14 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,10 @@ else LDFLAGS=-lm -pthread -lrt -ldl -rdynamic endif +ifdef DOUBLE + CUSTOM_CFLAGS+=-Dfloating_t=double +endif + # Profiling: ifdef PROFILING LDFLAGS+=-pg diff --git a/board.c b/board.c index 5bc33c6..09becfc 100644 --- a/board.c +++ b/board.c @@ -196,7 +196,7 @@ void board_clear(struct board *board) { int size = board_size(board); - float komi = board->komi; + floating_t komi = board->komi; board_done_noalloc(board); board_setup(board); @@ -1508,7 +1508,7 @@ board_get_one_point_eye(struct board *board, coord_t coord) } -float +floating_t board_fast_score(struct board *board) { int scores[S_MAX]; @@ -1573,7 +1573,7 @@ board_tromp_taylor_iter(struct board *board, int *ownermap) } /* Tromp-Taylor Counting */ -float +floating_t board_official_score(struct board *board, struct move_queue *q) { diff --git a/board.h b/board.h index 11dc23f..179c7a4 100644 --- a/board.h +++ b/board.h @@ -124,7 +124,7 @@ struct board { int size2; /* size^2 */ int bits2; /* ceiling(log2(size2)) */ int captures[S_MAX]; - float komi; + floating_t komi; int handicap; /* The ruleset is currently almost never taken into account; * the board implementation is basically Chinese rules (handicap @@ -369,10 +369,10 @@ enum stone board_get_one_point_eye(struct board *board, coord_t c); * (e.g. playouts), use board_fast_score(). */ /* Positive: W wins */ /* Compare number of stones + 1pt eyes. */ -float board_fast_score(struct board *board); +floating_t board_fast_score(struct board *board); /* Tromp-Taylor scoring, assuming given groups are actually dead. */ struct move_queue; -float board_official_score(struct board *board, struct move_queue *mq); +floating_t board_official_score(struct board *board, struct move_queue *mq); /** Iterators */ diff --git a/gtp.c b/gtp.c index 80ef1c6..f7fc24f 100644 --- a/gtp.c +++ b/gtp.c @@ -379,7 +379,7 @@ gtp_parse(struct board *board, struct engine *engine, struct time_info *ti, char struct move_queue q = { .moves = 0 }; if (engine->dead_group_list) engine->dead_group_list(engine, board, &q); - float score = board_official_score(board, &q); + floating_t score = board_official_score(board, &q); char str[64]; if (DEBUGL(1)) fprintf(stderr, "counted score %.1f\n", score); @@ -468,7 +468,7 @@ next_group:; * and call uct_evaluate() for each. uct_evaluate() * will throw NAN in case of invalid moves and such. */ for (int i = 0; i < board->flen; i++) { - float val = uct_evaluate(engine, board, &ti[color], board->f[i], color); + floating_t val = uct_evaluate(engine, board, &ti[color], board->f[i], color); if (isnan(val)) continue; printf("%s %1.3f\n", coord2sstr(board->f[i], board), (double) val); diff --git a/montecarlo/internal.h b/montecarlo/internal.h index 985e494..4222041 100644 --- a/montecarlo/internal.h +++ b/montecarlo/internal.h @@ -13,7 +13,7 @@ struct playout_policy; struct montecarlo { int debug_level; int gamelen; - float resign_ratio; + floating_t resign_ratio; int loss_threshold; struct playout_policy *playout; }; diff --git a/montecarlo/montecarlo.c b/montecarlo/montecarlo.c index 53b3249..50df996 100644 --- a/montecarlo/montecarlo.c +++ b/montecarlo/montecarlo.c @@ -60,7 +60,7 @@ board_stats_print(struct board *board, struct move_stat *moves, FILE *f) fprintf(f, "%2d | ", y); for (x = 1; x < board_size(board) - 1; x++) if (moves[y * board_size(board) + x].games) - fprintf(f, "%0.2f ", (float) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games); + fprintf(f, "%0.2f ", (floating_t) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games); else fprintf(f, "---- "); fprintf(f, "| "); @@ -94,7 +94,7 @@ montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum /* resign when the hope for win vanishes */ coord_t top_coord = resign; - float top_ratio = mc->resign_ratio; + floating_t top_ratio = mc->resign_ratio; /* We use [0] for pass. Normally, this is an inaccessible corner * of board margin. */ @@ -185,7 +185,7 @@ pass_wins: continue; } - float ratio = (float) moves[c].wins / moves[c].games; + floating_t ratio = (floating_t) moves[c].wins / moves[c].games; /* Since pass is [0,0], we will pass only when we have nothing * better to do. */ if (ratio >= top_ratio) { diff --git a/ownermap.c b/ownermap.c index c4218a0..702f47b 100644 --- a/ownermap.c +++ b/ownermap.c @@ -31,7 +31,7 @@ board_ownermap_merge(int bsize2, struct board_ownermap *dst, struct board_ownerm } enum point_judgement -board_ownermap_judge_point(struct board_ownermap *ownermap, coord_t c, float thres) +board_ownermap_judge_point(struct board_ownermap *ownermap, coord_t c, floating_t thres) { assert(ownermap->map); int n = ownermap->map[c][S_NONE]; diff --git a/ownermap.h b/ownermap.h index 337fbe0..b99fdff 100644 --- a/ownermap.h +++ b/ownermap.h @@ -28,12 +28,12 @@ enum point_judgement { PJ_WHITE = S_WHITE, PJ_UNKNOWN = 3, }; -enum point_judgement board_ownermap_judge_point(struct board_ownermap *ownermap, coord_t c, float thres); +enum point_judgement board_ownermap_judge_point(struct board_ownermap *ownermap, coord_t c, floating_t thres); /* Estimate status of stones on board based on ownermap stats. */ struct group_judgement { - float thres; + floating_t thres; enum gj_state { GS_NONE, GS_DEAD, diff --git a/playout.c b/playout.c index 85dd386..2c92af2 100644 --- a/playout.c +++ b/playout.c @@ -144,7 +144,7 @@ play_random: color = stone_other(color); } - float score = board_fast_score(b); + floating_t score = board_fast_score(b); int result = (starting_color == S_WHITE ? score * 2 : - (score * 2)); if (DEBUGL(6)) { diff --git a/playout/elo.c b/playout/elo.c index fba5896..6262426 100644 --- a/playout/elo.c +++ b/playout/elo.c @@ -49,7 +49,7 @@ struct patternset { struct elo_policy { bool assess_fastpat; - float selfatari; + floating_t selfatari; struct patternset choose, assess; playout_elo_callbackp callback; void *callback_data; diff --git a/random.c b/random.c index 16b424d..20457ea 100644 --- a/random.c +++ b/random.c @@ -33,12 +33,12 @@ fast_random(unsigned int max) return ((pmseed & 0xffff) * max) >> 16; } -float +floating_t fast_frandom(void) { - /* Construct (1,2) IEEE float from our random integer */ + /* Construct (1,2) IEEE floating_t from our random integer */ /* http://rgba.org/articles/sfrand/sfrand.htm */ - union { unsigned long ul; float f; } p; + union { unsigned long ul; floating_t f; } p; p.ul = (((pmseed *= 16807) & 0x007fffff) - 1) | 0x3f800000; return p.f - 1.0f; } @@ -85,14 +85,14 @@ fast_random(unsigned int max) return ((pmseed & 0xffff) * max) >> 16; } -float +floating_t fast_frandom(void) { - /* Construct (1,2) IEEE float from our random integer */ + /* Construct (1,2) IEEE floating_t from our random integer */ /* http://rgba.org/articles/sfrand/sfrand.htm */ unsigned long pmseed = (unsigned long)pthread_getspecific(seed_key); pmseed *= 16807; - union { unsigned long ul; float f; } p; + union { unsigned long ul; floating_t f; } p; p.ul = ((pmseed & 0x007fffff) - 1) | 0x3f800000; pthread_setspecific(seed_key, (void *)pmseed); return p.f - 1.0f; diff --git a/random.h b/random.h index fe351c4..0722a35 100644 --- a/random.h +++ b/random.h @@ -3,6 +3,8 @@ #include +#include "util.h" + void fast_srandom(unsigned long seed); unsigned long fast_getseed(void); @@ -12,7 +14,7 @@ uint16_t fast_random(unsigned int max); static uint32_t fast_irandom(unsigned int max); /* Get random number in [0..1] range. */ -float fast_frandom(); +floating_t fast_frandom(); static inline uint32_t diff --git a/stats.h b/stats.h index 7dcae03..7a8e1a0 100644 --- a/stats.h +++ b/stats.h @@ -11,14 +11,14 @@ struct move_stats { int playouts; // # of playouts - float value; // BLACK wins/playouts + floating_t value; // BLACK wins/playouts }; /* Add a result to the stats. */ -static void stats_add_result(struct move_stats *s, float result, int playouts); +static void stats_add_result(struct move_stats *s, floating_t result, int playouts); /* Remove a result from the stats. */ -static void stats_rm_result(struct move_stats *s, float result, int playouts); +static void stats_rm_result(struct move_stats *s, floating_t result, int playouts); /* Merge two stats together. THIS IS NOT ATOMIC! */ static void stats_merge(struct move_stats *dest, struct move_stats *src); @@ -28,11 +28,11 @@ static void stats_reverse_parity(struct move_stats *s); /* Temper value based on parent value in specified way - the value should be * usable standalone then, representing an improvement against parent value. */ -static float stats_temper_value(float val, float pval, int mode); +static floating_t stats_temper_value(floating_t val, floating_t pval, int mode); /* We actually do the atomicity in a pretty hackish way - we simply - * rely on the fact that int,float operations should be atomic with + * rely on the fact that int,floating_t operations should be atomic with * reasonable compilers (gcc) on reasonable architectures (i386, * x86_64). */ /* There is a write order dependency - when we bump the playouts, @@ -41,10 +41,10 @@ static float stats_temper_value(float val, float pval, int mode); * current s->playouts is zero. */ static inline void -stats_add_result(struct move_stats *s, float result, int playouts) +stats_add_result(struct move_stats *s, floating_t result, int playouts) { int s_playouts = s->playouts; - float s_value = s->value; + floating_t s_value = s->value; /* Force the load, another thread can work on the * values in parallel. */ __sync_synchronize(); /* full memory barrier */ @@ -59,11 +59,11 @@ stats_add_result(struct move_stats *s, float result, int playouts) } static inline void -stats_rm_result(struct move_stats *s, float result, int playouts) +stats_rm_result(struct move_stats *s, floating_t result, int playouts) { if (s->playouts > playouts) { int s_playouts = s->playouts; - float s_value = s->value; + floating_t s_value = s->value; /* Force the load, another thread can work on the * values in parallel. */ __sync_synchronize(); /* full memory barrier */ @@ -102,11 +102,11 @@ stats_reverse_parity(struct move_stats *s) s->value = 1 - s->value; } -static inline float -stats_temper_value(float val, float pval, int mode) +static inline floating_t +stats_temper_value(floating_t val, floating_t pval, int mode) { - float tval = val; - float expd = val - pval; + floating_t tval = val; + floating_t expd = val - pval; switch (mode) { case 1: /* no tempering */ tval = val; @@ -115,7 +115,7 @@ stats_temper_value(float val, float pval, int mode) tval = 0.5 + expd / 2; break; case 3: { /* 0.5+bzz((result-expected)^2) */ - float ntval = expd * expd; + floating_t ntval = expd * expd; /* val = 1 pval = 0.8 : ntval = 0.04 tval = 0.54 * val = 1 pval = 0.6 : ntval = 0.16 tval = 0.66 * val = 1 pval = 0.3 : ntval = 0.49 tval = 0.99 diff --git a/tactics/util.c b/tactics/util.c index 3711a94..610ba6f 100644 --- a/tactics/util.c +++ b/tactics/util.c @@ -84,7 +84,7 @@ cfg_distances(struct board *b, coord_t start, int *distances, int maxdist) } -float +floating_t board_effective_handicap(struct board *b, int first_move_value) { /* This can happen if the opponent passes during handicap @@ -97,7 +97,7 @@ board_effective_handicap(struct board *b, int first_move_value) bool pass_is_safe(struct board *b, enum stone color, struct move_queue *mq) { - float score = board_official_score(b, mq); + floating_t score = board_official_score(b, mq); if (color == S_BLACK) score = -score; //fprintf(stderr, "%d score %f\n", color, score); diff --git a/tactics/util.h b/tactics/util.h index bfe5c30..47e42cf 100644 --- a/tactics/util.h +++ b/tactics/util.h @@ -31,7 +31,7 @@ void cfg_distances(struct board *b, coord_t start, int *distances, int maxdist); * handicap stone, 7 is a good default. */ /* This is just an approximation since in reality, handicap seems to be usually * non-linear. */ -float board_effective_handicap(struct board *b, int first_move_value); +floating_t board_effective_handicap(struct board *b, int first_move_value); /* Decide if the given player wins counting on the board, considering * that given groups are dead. (To get the list of dead groups, use diff --git a/timeinfo.h b/timeinfo.h index 9771eab..c8f7bc5 100644 --- a/timeinfo.h +++ b/timeinfo.h @@ -63,7 +63,7 @@ struct time_info { /* Parse time information provided in custom format: * =NUM - fixed number of simulations per move - * NUM - number of seconds to spend per move (can be float) + * NUM - number of seconds to spend per move (can be floating_t) * _NUM - number of seconds to spend per game * * Returns false on parse error. */ diff --git a/uct/dynkomi.c b/uct/dynkomi.c index 1e1a9d5..6c810b1 100644 --- a/uct/dynkomi.c +++ b/uct/dynkomi.c @@ -53,19 +53,19 @@ struct dynkomi_linear { bool rootbased; }; -static float +static floating_t linear_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree) { struct dynkomi_linear *l = d->data; if (b->moves >= l->moves) return 0; - float base_komi = board_effective_handicap(b, l->handicap_value); - float extra_komi = base_komi * (l->moves - b->moves) / l->moves; + floating_t base_komi = board_effective_handicap(b, l->handicap_value); + floating_t extra_komi = base_komi * (l->moves - b->moves) / l->moves; return extra_komi; } -static float +static floating_t linear_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node) { struct dynkomi_linear *l = d->data; @@ -151,49 +151,49 @@ struct dynkomi_adaptive { * by linear dynkomi.) */ int lead_moves; /* Maximum komi to pretend the opponent to give. */ - float max_losing_komi; + floating_t max_losing_komi; /* Game portion at which losing komi is not allowed anymore. */ - float losing_komi_stop; + floating_t losing_komi_stop; /* Alternative game portion determination. */ bool adapt_aport; - float (*indicator)(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color); + floating_t (*indicator)(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color); /* Value-based adaptation. */ - float zone_red, zone_green; + floating_t zone_red, zone_green; int score_step; - float score_step_byavg; // use portion of average score as increment + floating_t score_step_byavg; // use portion of average score as increment bool use_komi_ratchet; bool losing_komi_ratchet; // ratchet even losing komi int komi_ratchet_maxage; // runtime, not configuration: int komi_ratchet_age; - float komi_ratchet; + floating_t komi_ratchet; /* Score-based adaptation. */ - float (*adapter)(struct uct_dynkomi *d, struct board *b); - float adapt_base; // [0,1) + floating_t (*adapter)(struct uct_dynkomi *d, struct board *b); + floating_t adapt_base; // [0,1) /* Sigmoid adaptation rate parameter; see below for details. */ - float adapt_phase; // [0,1] - float adapt_rate; // [1,infty) + floating_t adapt_phase; // [0,1] + floating_t adapt_rate; // [1,infty) /* Linear adaptation rate parameter. */ int adapt_moves; - float adapt_dir; // [-1,1] + floating_t adapt_dir; // [-1,1] }; #define TRUSTWORTHY_KOMI_PLAYOUTS 200 -static float +static floating_t board_game_portion(struct dynkomi_adaptive *a, struct board *b) { if (!a->adapt_aport) { int total_moves = b->moves + 2 * board_estimated_moves_left(b); - return (float) b->moves / total_moves; + return (floating_t) b->moves / total_moves; } else { int brsize = board_size(b) - 2; - return 1.0 - (float) b->flen / (brsize * brsize); + return 1.0 - (floating_t) b->flen / (brsize * brsize); } } -static float +static floating_t adapter_sigmoid(struct uct_dynkomi *d, struct board *b) { struct dynkomi_adaptive *a = d->data; @@ -202,12 +202,12 @@ adapter_sigmoid(struct uct_dynkomi *d, struct board *b) * at game stage a->adapt_phase crosses though 0.5 and * approaches 1 at the game end; the slope is controlled * by a->adapt_rate. */ - float game_portion = board_game_portion(a, b); - float l = game_portion - a->adapt_phase; + floating_t game_portion = board_game_portion(a, b); + floating_t l = game_portion - a->adapt_phase; return 1.0 / (1.0 + exp(-a->adapt_rate * l)); } -static float +static floating_t adapter_linear(struct uct_dynkomi *d, struct board *b) { struct dynkomi_adaptive *a = d->data; @@ -222,7 +222,7 @@ adapter_linear(struct uct_dynkomi *d, struct board *b) return a->adapt_dir * b->moves / a->adapt_moves; } -static float +static floating_t komi_by_score(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color) { struct dynkomi_adaptive *a = d->data; @@ -234,16 +234,16 @@ komi_by_score(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum st d->score.playouts = 1; /* Look at average score and push extra_komi in that direction. */ - float p = a->adapter(d, b); + floating_t p = a->adapter(d, b); p = a->adapt_base + p * (1 - a->adapt_base); if (p > 0.9) p = 0.9; // don't get too eager! - float extra_komi = tree->extra_komi + p * score.value; + floating_t extra_komi = tree->extra_komi + p * score.value; if (DEBUGL(3)) fprintf(stderr, "mC += %f * %f\n", p, score.value); return extra_komi; } -static float +static floating_t komi_by_value(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color) { struct dynkomi_adaptive *a = d->data; @@ -280,7 +280,7 @@ komi_by_value(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum st /* Positive extra_komi means that we are _giving_ * komi (winning), negative extra_komi is _taking_ * komi (losing). */ - float extra_komi = komi_by_color(tree->extra_komi, color); + floating_t extra_komi = komi_by_color(tree->extra_komi, color); int score_step_red = -a->score_step; int score_step_green = a->score_step; @@ -338,9 +338,9 @@ komi_by_value(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum st } } -static float +static floating_t bounded_komi(struct dynkomi_adaptive *a, struct board *b, - enum stone color, float komi, float max_losing_komi) + enum stone color, floating_t komi, floating_t max_losing_komi) { /* At the end of game, disallow losing komi. */ if (komi_by_color(komi, color) < 0 @@ -349,7 +349,7 @@ bounded_komi(struct dynkomi_adaptive *a, struct board *b, /* Get lower bound on komi we take so that we don't underperform * too much. */ - float min_komi = komi_by_color(- max_losing_komi, color); + floating_t min_komi = komi_by_color(- max_losing_komi, color); if (komi_by_color(komi - min_komi, color) > 0) return komi; @@ -357,7 +357,7 @@ bounded_komi(struct dynkomi_adaptive *a, struct board *b, return min_komi; } -static float +static floating_t adaptive_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree) { struct dynkomi_adaptive *a = d->data; @@ -372,13 +372,13 @@ adaptive_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree) board_effective_handicap(b, 7 /* XXX */), a->max_losing_komi); - float komi = a->indicator(d, b, tree, color); + floating_t komi = a->indicator(d, b, tree, color); if (DEBUGL(3)) fprintf(stderr, "dynkomi: %f -> %f\n", tree->extra_komi, komi); return bounded_komi(a, b, color, komi, a->max_losing_komi); } -static float +static floating_t adaptive_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node) { return tree->extra_komi; diff --git a/uct/dynkomi.h b/uct/dynkomi.h index 5b54b90..9a0e0ba 100644 --- a/uct/dynkomi.h +++ b/uct/dynkomi.h @@ -31,12 +31,12 @@ struct uct_dynkomi; /* Determine base dynamic komi for this genmove run. The returned * value is stored in tree->extra_komi and by itself used just for * user information. */ -typedef float (*uctd_permove)(struct uct_dynkomi *d, struct board *b, struct tree *tree); +typedef floating_t (*uctd_permove)(struct uct_dynkomi *d, struct board *b, struct tree *tree); /* Determine actual dynamic komi for this simulation (run on board @b * from node @node). In some cases, this function will just return * tree->extra_komi, in other cases it might want to adjust the komi * according to the actual move depth. */ -typedef float (*uctd_persim)(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node); +typedef floating_t (*uctd_persim)(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node); /* Destroy the uct_dynkomi structure. */ typedef void (*uctd_done)(struct uct_dynkomi *d); diff --git a/uct/internal.h b/uct/internal.h index 3f3eee7..4c284a1 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -27,7 +27,7 @@ struct joseki_dict; struct uct { int debug_level; int games, gamelen; - float resign_threshold, sure_win_threshold; + floating_t resign_threshold, sure_win_threshold; double best2_ratio, bestr_ratio; bool pass_all_alive; bool territory_scoring; @@ -63,7 +63,7 @@ struct uct { int dynkomi_interval; struct uct_dynkomi *dynkomi; - float val_scale; + floating_t val_scale; int val_points; bool val_extra; @@ -81,7 +81,7 @@ struct uct { int random_policy_chance; int local_tree; int tenuki_d; - float local_tree_aging; + floating_t local_tree_aging; bool local_tree_allseq; /* Playout-localtree integration. */ bool local_tree_playout; // can be true only if ELO playout @@ -133,11 +133,11 @@ struct uct_descent { typedef struct tree_node *(*uctp_choose)(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude); -typedef float (*uctp_evaluate)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity); +typedef floating_t (*uctp_evaluate)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity); typedef void (*uctp_descend)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity, bool allow_pass); typedef void (*uctp_winner)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent); typedef void (*uctp_prior)(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity); -typedef void (*uctp_update)(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *amaf, float result); +typedef void (*uctp_update)(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *amaf, floating_t result); struct uct_policy { struct uct *uct; diff --git a/uct/plugin/wolf.c b/uct/plugin/wolf.c index 56b987f..a94843f 100644 --- a/uct/plugin/wolf.c +++ b/uct/plugin/wolf.c @@ -194,7 +194,7 @@ update mode. /* The API types: */ #define MAXBOARDSIZE 19 typedef char byte_board[MAXBOARDSIZE+2][MAXBOARDSIZE+2]; // The array indices are 1-based! -typedef float influ_board[MAXBOARDSIZE][MAXBOARDSIZE]; +typedef floating_t influ_board[MAXBOARDSIZE][MAXBOARDSIZE]; /* Our context structure. */ struct context { @@ -203,7 +203,7 @@ struct context { void *dlh; void (*SETPARAM)(double sv, double omega, uint16_t mi); void (*EVALFUN1)(char *javp, char *InfluField); - void (*FINDMOVE2)(int fa, char *mi, char *mj, float *mxscore, influ_board *SB, byte_board **PChainNo); + void (*FINDMOVE2)(int fa, char *mi, char *mj, floating_t *mxscore, influ_board *SB, byte_board **PChainNo); }; @@ -268,7 +268,7 @@ pachi_plugin_prior(void *data, struct tree_node *node, struct prior_map *map, in /* Retrieve values of moves. */ char bestx, besty; - float bestval; + floating_t bestval; influ_board values; byte_board *chaininfo; ctx->FINDMOVE2(map->to_play == S_BLACK ? 1 : -1, &bestx, &besty, &bestval, &values, &chaininfo); @@ -277,11 +277,11 @@ pachi_plugin_prior(void *data, struct tree_node *node, struct prior_map *map, in /* In the first pass, determine best and worst value. (Best value * reported by FINDMOVE2 is wrong.) In the second pass, we set the * priors by normalization based on the determined values. */ - float best = -1000, worst = 1000; + floating_t best = -1000, worst = 1000; foreach_free_point(map->b) { if (!map->consider[c]) continue; - float value = values[coord_x(c, b) - 1][coord_y(c, b) - 1]; + floating_t value = values[coord_x(c, b) - 1][coord_y(c, b) - 1]; if (map->to_play == S_WHITE) value = -value; if (value > best) best = value; else if (value < worst) worst = value; @@ -294,7 +294,7 @@ pachi_plugin_prior(void *data, struct tree_node *node, struct prior_map *map, in /* Take the value and normalize it somehow. */ /* Right now, we just do this by linear rescaling from * [worst, best] to [0,1]. */ - float value = values[coord_x(c, b) - 1][coord_y(c, b) - 1]; + floating_t value = values[coord_x(c, b) - 1][coord_y(c, b) - 1]; if (map->to_play == S_WHITE) value = -value; value = (value - worst) / (best - worst); // fprintf(stderr, "\t[%s %s] %f/%f\n", stone2str(map->to_play), coord2sstr(c, b), value, best); @@ -311,7 +311,7 @@ pachi_plugin_init(char *arg, struct board *b, int seed) /* Initialize ctx defaults here. */ char *file = NULL; - float overrelax = 1.0, threshold = 0.001; + floating_t overrelax = 1.0, threshold = 0.001; int iterations = 13; ctx->eqex = -1; diff --git a/uct/policy/generic.h b/uct/policy/generic.h index 544f90a..6c071dc 100644 --- a/uct/policy/generic.h +++ b/uct/policy/generic.h @@ -20,12 +20,12 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des /* Information abound best children. */ \ /* XXX: We assume board <=25x25. */ \ struct uct_descent dbest[BOARD_MAX_MOVES + 1] = { { .node = descent->node->children, .lnode = NULL } }; int dbests = 1; \ - float best_urgency = -9999; \ + floating_t best_urgency = -9999; \ /* Descent children iterator. */ \ struct uct_descent dci = { .node = descent->node->children, .lnode = descent->lnode ? descent->lnode->children : NULL }; \ \ for (; dci.node; dci.node = dci.node->sibling) { \ - float urgency; \ + floating_t urgency; \ /* Do not consider passing early. */ \ if (unlikely((!allow_pass && is_pass(dci.node->coord)) || (dci.node->hints & TREE_HINT_INVALID))) \ continue; \ diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c index 8cbeb19..679ec23 100644 --- a/uct/policy/ucb1.c +++ b/uct/policy/ucb1.c @@ -19,11 +19,11 @@ struct ucb1_policy { * paper calls 'p'. Original UCB has this on 2, but this seems to * produce way too wide searches; reduce this to get deeper and * narrower readouts - try 0.2. */ - float explore_p; + floating_t explore_p; /* First Play Urgency - if set to less than infinity (the MoGo paper * above reports 1.0 as the best), new branches are explored only * if none of the existing ones has higher urgency than fpu. */ - float fpu; + floating_t fpu; }; @@ -36,7 +36,7 @@ ucb1_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *descen * of the explore coefficient. */ struct ucb1_policy *b = p->data; - float xpl = log(descent->node->u.playouts + descent->node->prior.playouts); + floating_t xpl = log(descent->node->u.playouts + descent->node->prior.playouts); uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) { struct tree_node *ni = di.node; @@ -58,7 +58,7 @@ ucb1_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *descen } void -ucb1_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *map, float result) +ucb1_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *map, floating_t result) { /* It is enough to iterate by a single chain; we will * update all the preceding positions properly since diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index e3c3c1d..f0e9d56 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -19,22 +19,22 @@ struct ucb1_policy_amaf { * paper calls 'p'. Original UCB has this on 2, but this seems to * produce way too wide searches; reduce this to get deeper and * narrower readouts - try 0.2. */ - float explore_p; + floating_t explore_p; /* First Play Urgency - if set to less than infinity (the MoGo paper * above reports 1.0 as the best), new branches are explored only * if none of the existing ones has higher urgency than fpu. */ - float fpu; + floating_t fpu; unsigned int equiv_rave; bool check_nakade; bool sylvain_rave; /* Coefficient of local tree values embedded in RAVE. */ - float ltree_rave; + floating_t ltree_rave; }; -static inline float fast_sqrt(unsigned int x) +static inline floating_t fast_sqrt(unsigned int x) { - static const float table[] = { + static const floating_t table[] = { 0, 1, 1.41421356237309504880, 1.73205080756887729352, 2.00000000000000000000, 2.23606797749978969640, 2.44948974278317809819, 2.64575131106459059050, @@ -75,7 +75,7 @@ static inline float fast_sqrt(unsigned int x) } #define LTREE_DEBUG if (0) -static float inline +static floating_t inline ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity) { struct ucb1_policy_amaf *b = p->data; @@ -99,15 +99,15 @@ ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *d stats_merge(&r, &l); } - float value = 0; + floating_t value = 0; if (n.playouts) { if (r.playouts) { /* At the beginning, beta is at 1 and RAVE is used. * At b->equiv_rate, beta is at 1/3 and gets steeper on. */ - float beta; + floating_t beta; if (b->sylvain_rave) { - beta = (float) r.playouts / (r.playouts + n.playouts - + (float) n.playouts * r.playouts / b->equiv_rave); + beta = (floating_t) r.playouts / (r.playouts + n.playouts + + (floating_t) n.playouts * r.playouts / b->equiv_rave); } else { /* XXX: This can be cached in descend; but we don't use this by default. */ beta = sqrt(b->equiv_rave / (3 * node->parent->u.playouts + b->equiv_rave)); @@ -129,7 +129,7 @@ void ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity, bool allow_pass) { struct ucb1_policy_amaf *b = p->data; - float nconf = 1.f; + floating_t nconf = 1.f; if (b->explore_p > 0) nconf = sqrt(log(descent->node->u.playouts + descent->node->prior.playouts)); @@ -153,7 +153,7 @@ ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *de void ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, - struct playout_amafmap *map, float result) + struct playout_amafmap *map, floating_t result) { struct ucb1_policy_amaf *b = p->data; enum stone child_color = stone_other(node_color); @@ -195,7 +195,7 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, amaf_color = child_color; } - float nres = result; + floating_t nres = result; if (amaf_color != child_color) { continue; } diff --git a/uct/prior.h b/uct/prior.h index 0fb7ca3..8778027 100644 --- a/uct/prior.h +++ b/uct/prior.h @@ -25,7 +25,7 @@ struct prior_map { }; /* @value is the value, @playouts is its weight. */ -static void add_prior_value(struct prior_map *map, coord_t c, float value, int playouts); +static void add_prior_value(struct prior_map *map, coord_t c, floating_t value, int playouts); void uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map); @@ -35,9 +35,9 @@ void uct_prior_done(struct uct_prior *p); static inline void -add_prior_value(struct prior_map *map, coord_t c, float value, int playouts) +add_prior_value(struct prior_map *map, coord_t c, floating_t value, int playouts) { - float v = map->parity > 0 ? value : 1 - value; + floating_t v = map->parity > 0 ? value : 1 - value; /* We don't need atomicity: */ struct move_stats s = { .playouts = playouts, .value = v }; stats_merge(&map->prior[c], &s); diff --git a/uct/search.c b/uct/search.c index 4c604e5..461e175 100644 --- a/uct/search.c +++ b/uct/search.c @@ -240,7 +240,7 @@ uct_search_progress(struct uct *u, struct board *b, enum stone color, && !u->pondering && di && i > s->last_dynkomi + di) { s->last_dynkomi += di; - float old_dynkomi = ctx->t->extra_komi; + floating_t old_dynkomi = ctx->t->extra_komi; ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t); if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi) fprintf(stderr, "dynkomi adjusted (%f -> %f)\n", @@ -330,7 +330,7 @@ uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b, /* Do not waste time if we are winning. Spend up to worst time if * we are unsure, but only desired time if we are sure of winning. */ - float beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5); + floating_t beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5); if (ti->dim == TD_WALLTIME && beta > 0) { double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta); double elapsed = time_now() - ti->len.t.timer_start; diff --git a/uct/tree.c b/uct/tree.c index ac25fcf..797065c 100644 --- a/uct/tree.c +++ b/uct/tree.c @@ -77,7 +77,7 @@ tree_init_node(struct tree *t, coord_t coord, int depth, bool fast_alloc) /* Create a tree structure. Pre-allocate all nodes if max_tree_size is > 0. */ struct tree * tree_init(struct board *board, enum stone color, unsigned long max_tree_size, - unsigned long max_pruned_size, unsigned long pruning_threshold, float ltree_aging, int hbits) + unsigned long max_pruned_size, unsigned long pruning_threshold, floating_t ltree_aging, int hbits) { struct tree *t = calloc2(1, sizeof(*t)); t->board = board; @@ -786,7 +786,7 @@ tree_promote_node(struct tree *tree, struct tree_node **node) * to recompute max_depth but it's not worth it: it's just for debugging * and soon the tree will grow and max_depth will become correct again. */ - if (tree->ltree_aging != 1.0f) { // XXX: != should work here even with the float + if (tree->ltree_aging != 1.0f) { // XXX: != should work here even with the floating_t tree_age_node(tree, tree->ltree_black); tree_age_node(tree, tree->ltree_white); } diff --git a/uct/tree.h b/uct/tree.h index 3b92b03..501de11 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -103,7 +103,7 @@ struct tree { /* The value of applied extra komi. For DYNKOMI_LINEAR, this value * is only informative, the actual value is computed per simulation * based on leaf node depth. */ - float extra_komi; + floating_t extra_komi; /* We merge local (non-tenuki) sequences for both colors, occuring * anywhere in the tree; nodes are created on-demand, special 'pass' @@ -118,7 +118,7 @@ struct tree { struct tree_node *ltree_white; // Aging factor; 2 means halve all playout values after each turn. // 1 means don't age at all. - float ltree_aging; + floating_t ltree_aging; /* Hash table used when working as slave for the distributed engine. * Maps coordinate path to tree node. */ @@ -136,7 +136,7 @@ struct tree { /* Warning: all functions below except tree_expand_node & tree_leaf_node are THREAD-UNSAFE! */ struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size, - unsigned long max_pruned_size, unsigned long pruning_threshold, float ltree_aging, int hbits); + unsigned long max_pruned_size, unsigned long pruning_threshold, floating_t ltree_aging, int hbits); void tree_done(struct tree *tree); void tree_dump(struct tree *tree, int thres); void tree_save(struct tree *tree, struct board *b, int thres); diff --git a/uct/uct.c b/uct/uct.c index 8e74a35..02bce74 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -484,7 +484,7 @@ uct_dumptbook(struct engine *e, struct board *b, enum stone color) } -float +floating_t uct_evaluate(struct engine *e, struct board *b, struct time_info *ti, coord_t c, enum stone color) { struct uct *u = e->data; @@ -501,7 +501,7 @@ uct_evaluate(struct engine *e, struct board *b, struct time_info *ti, coord_t c, uct_prepare_move(u, &b2, color); assert(u->t); - float bestval; + floating_t bestval; uct_search(u, &b2, ti, color, u->t); struct tree_node *best = u->policy->choose(u->policy, u->t->root, &b2, color, resign); if (!best) { diff --git a/uct/uct.h b/uct/uct.h index 4a66276..c653668 100644 --- a/uct/uct.h +++ b/uct/uct.h @@ -13,6 +13,6 @@ void uct_dumptbook(struct engine *e, struct board *b, enum stone color); /* Evaluate feasibility of player @color playing at @c. Will simulate * this move for time @ti, then return 1-max(opponent_win_likelihood). */ -float uct_evaluate(struct engine *e, struct board *b, struct time_info *ti, coord_t c, enum stone color); +floating_t uct_evaluate(struct engine *e, struct board *b, struct time_info *ti, coord_t c, enum stone color); #endif diff --git a/uct/walk.c b/uct/walk.c index c79ceef..2c01dda 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -369,17 +369,17 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, return result; } -static float +static floating_t scale_value(struct uct *u, struct board *b, int result) { - float rval = result > 0; + floating_t rval = result > 0; if (u->val_scale) { int vp = u->val_points; if (!vp) { vp = board_size(b) - 1; vp *= vp; vp *= 2; } - float sval = (float) abs(result) / vp; + floating_t sval = (floating_t) abs(result) / vp; sval = sval > 1 ? 1 : sval; if (result < 0) sval = 1 - sval; if (u->val_extra) @@ -394,7 +394,7 @@ scale_value(struct uct *u, struct board *b, int result) static void record_local_sequence(struct uct *u, struct tree *t, struct uct_descent *descent, int dlen, int di, - enum stone seq_color, float rval) + enum stone seq_color, floating_t rval) { /* Ignore pass sequences. */ if (is_pass(descent[di].node->coord)) @@ -563,7 +563,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree if (passes >= 2) { /* XXX: No dead groups support. */ - float score = board_official_score(&b2, NULL); + floating_t score = board_official_score(&b2, NULL); /* Result from black's perspective (no matter who * the player; black's perspective is always * what the tree stores. */ @@ -602,7 +602,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree assert(n == t->root || n->parent); if (result != 0) { - float rval = scale_value(u, b, result); + floating_t rval = scale_value(u, b, result); u->policy->update(u->policy, t, n, node_color, player_color, amaf, rval); if (t->use_extra_komi) { @@ -612,7 +612,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree if (u->local_tree && n->parent && !is_pass(n->coord) && dlen > 0) { /* Possibly transform the rval appropriately. */ - float expval = seq_value.value / seq_value.playouts; + floating_t expval = seq_value.value / seq_value.playouts; rval = stats_temper_value(rval, expval, u->local_tree); /* Get the local sequences and record them in ltree. */ diff --git a/util.h b/util.h index 6edbaa2..7e531df 100644 --- a/util.h +++ b/util.h @@ -5,6 +5,12 @@ /* Misc. definitions. */ +/* Use make -Dfloating_t=double in large configurations with counts > 1M, + * where 24 bits of floating_t mantissa become insufficient. */ +#ifndef floating_t +# define floating_t float +#endif + #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect((x), 0) -- 2.11.4.GIT