From 5522f79ae888ac0353804ccf55d2ada910a1bd6d Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sat, 7 Feb 2009 23:32:58 +0100 Subject: [PATCH] UCB1AMAF: Implement exploration coefficient for srave --- uct/policy/ucb1amaf.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 3 deletions(-) diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index f46aea1..12be8af 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -81,12 +81,113 @@ ucb1orave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *nod return nbest; } +float fast_sqrt(int x) +{ + static const float table[] = { + 0, + 1, + 1.41421356237309504880, + 1.73205080756887729352, + 2.00000000000000000000, +#if 0 + 2.23606797749978969640, + 2.44948974278317809819, + 2.64575131106459059050, + 2.82842712474619009760, + 3.00000000000000000000, + 3.16227766016837933199, + 3.31662479035539984911, + 3.46410161513775458705, + 3.60555127546398929311, + 3.74165738677394138558, + 3.87298334620741688517, + 4.00000000000000000000, + 4.12310562561766054982, + 4.24264068711928514640, + 4.35889894354067355223, + 4.47213595499957939281, + 4.58257569495584000658, + 4.69041575982342955456, + 4.79583152331271954159, + 4.89897948556635619639, + 5.00000000000000000000, + 5.09901951359278483002, + 5.19615242270663188058, + 5.29150262212918118100, + 5.38516480713450403125, + 5.47722557505166113456, + 5.56776436283002192211, + 5.65685424949238019520, + 5.74456264653802865985, + 5.83095189484530047087, + 5.91607978309961604256, + 6.00000000000000000000, + 6.08276253029821968899, + 6.16441400296897645025, + 6.24499799839839820584, + 6.32455532033675866399, + 6.40312423743284868648, + 6.48074069840786023096, + 6.55743852430200065234, + 6.63324958071079969822, + 6.70820393249936908922, + 6.78232998312526813906, + 6.85565460040104412493, + 6.92820323027550917410, + 7.00000000000000000000, + 7.07106781186547524400, + 7.14142842854284999799, + 7.21110255092797858623, + 7.28010988928051827109, + 7.34846922834953429459, + 7.41619848709566294871, + 7.48331477354788277116, + 7.54983443527074969723, + 7.61577310586390828566, + 7.68114574786860817576, + 7.74596669241483377035, + 7.81024967590665439412, + 7.87400787401181101968, + 7.93725393319377177150, +#endif + }; + //printf("sqrt %d\n", x); + if (x < sizeof(table) / sizeof(*table)) { + return table[x]; + } else { + return sqrt(x); +#if 0 + int y = 0; + int base = 1 << (sizeof(int) * 8 - 2); + if ((x & 0xFFFF0000) == 0) base >>= 16; + if ((x & 0xFF00FF00) == 0) base >>= 8; + if ((x & 0xF0F0F0F0) == 0) base >>= 4; + if ((x & 0xCCCCCCCC) == 0) base >>= 2; + // "base" starts at the highest power of four <= the argument. + + while (base > 0) { + if (x >= y + base) { + x -= y + base; + y += base << 1; + } + y >>= 1; + base >>= 2; + } + printf("sqrt %d = %d\n", x, y); + return y; +#endif + } +} + /* Sylvain RAVE function */ struct tree_node * ucb1srave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass) { struct ucb1_policy_amaf *b = p->data; float rave_coef = 1.0f / b->equiv_rave; + float conf = 1.f; + if (b->explore_p > 0 || b->explore_p_rave > 0) + conf = sqrt(log(node->u.playouts + node->prior.playouts)); struct tree_node *nbest = node->children; float best_urgency = -9999; @@ -105,6 +206,17 @@ ucb1srave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *nod nwins = ngames - nwins; rwins = rgames - rwins; } + float nval = 0, rval = 0; + if (ngames) { + nval = (float) nwins / ngames; + if (b->explore_p > 0) + nval += b->explore_p * conf / fast_sqrt(ngames); + } + if (rgames) { + rval = (float) rwins / rgames; + if (b->explore_p_rave > 0) + rval += b->explore_p_rave * conf / fast_sqrt(rgames); + } float urgency; if (ngames) { @@ -116,12 +228,12 @@ ucb1srave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *nod fprintf(stderr, "[beta %f = %d / (%d + %d + %f)]\n", beta, rgames, rgames, ngames, rave_coef * ngames * rgames); #endif - urgency = beta * rwins / rgames + (1 - beta) * nwins / ngames; + urgency = beta * rval + (1 - beta) * nval; } else { - urgency = (float) nwins / ngames; + urgency = nval; } } else if (rgames) { - urgency = (float) rwins / rgames; + urgency = rval; } else { urgency = b->fpu; } -- 2.11.4.GIT