From 4598d66d225fe8f64b8c6e59d1d1e0e7fe115d76 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Wed, 30 Sep 2009 14:36:54 +0200 Subject: [PATCH] UCB1AMAF: Remove old cruft - orave, some unused sqrt() optimizations --- uct/policy/ucb1amaf.c | 124 -------------------------------------------------- 1 file changed, 124 deletions(-) diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 989a1fa..0b20f22 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -36,47 +36,6 @@ struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, stru struct tree_node *ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass); -/* Original RAVE function */ -struct tree_node * -ucb1orave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass) -{ - /* We want to count in the prior stats here after all. Otherwise, - * nodes with positive prior will get explored _LESS_ since the - * urgency will be always higher; even with normal FPU because - * of the explore coefficient. */ - - struct ucb1_policy_amaf *b = p->data; - float xpl = log(node->u.playouts + node->prior.playouts) * b->explore_p; - float xpl_rave = log(node->amaf.playouts + (b->rave_prior ? node->prior.playouts : 0)) * b->explore_p_rave; - float beta = sqrt((float)b->equiv_rave / (3 * (node->u.playouts + node->prior.playouts) + b->equiv_rave)); - - struct tree_node *nbest = node->children; - float best_urgency = -9999; - for (struct tree_node *ni = node->children; ni; ni = ni->sibling) { - /* Do not consider passing early. */ - if (likely(!allow_pass) && unlikely(is_pass(ni->coord))) - continue; - int amaf_wins = ni->amaf.wins + (b->rave_prior ? ni->prior.wins : 0); - int amaf_playouts = ni->amaf.playouts + (b->rave_prior ? ni->prior.playouts : 0); - int uct_playouts = ni->u.playouts + ni->prior.playouts; - ni->amaf.value = (float)amaf_wins / amaf_playouts; - ni->prior.value = (float)ni->prior.wins / ni->prior.playouts; - float uctp = tree_node_get_value(tree, ni, u, parity) + sqrt(xpl / uct_playouts); - float ravep = tree_node_get_value(tree, ni, amaf, parity) + sqrt(xpl_rave / amaf_playouts); - float urgency = ni->u.playouts ? beta * ravep + (1 - beta) * uctp : b->fpu; - // fprintf(stderr, "uctp %f (uct %d/%d) ravep %f (xpl %f amaf %d/%d) beta %f => %f\n", uctp, ni->u.wins, ni->u.playouts, ravep, xpl_rave, amaf_wins, amaf_playouts, beta, urgency); - if (b->urg_randoma) - urgency += (float)(fast_random(b->urg_randoma) - b->urg_randoma / 2) / 1000; - if (b->urg_randomm) - urgency *= (float)(fast_random(b->urg_randomm) + 5) / b->urg_randomm; - if (urgency > best_urgency) { - best_urgency = urgency; - nbest = ni; - } - } - return nbest; -} - float fast_sqrt(int x) { static const float table[] = { @@ -85,93 +44,12 @@ float fast_sqrt(int x) 1.41421356237309504880, 1.73205080756887729352, 2.00000000000000000000, -#if 0 - 2.23606797749978969640, - 2.44948974278317809819, - 2.64575131106459059050, - 2.82842712474619009760, - 3.00000000000000000000, - 3.16227766016837933199, - 3.31662479035539984911, - 3.46410161513775458705, - 3.60555127546398929311, - 3.74165738677394138558, - 3.87298334620741688517, - 4.00000000000000000000, - 4.12310562561766054982, - 4.24264068711928514640, - 4.35889894354067355223, - 4.47213595499957939281, - 4.58257569495584000658, - 4.69041575982342955456, - 4.79583152331271954159, - 4.89897948556635619639, - 5.00000000000000000000, - 5.09901951359278483002, - 5.19615242270663188058, - 5.29150262212918118100, - 5.38516480713450403125, - 5.47722557505166113456, - 5.56776436283002192211, - 5.65685424949238019520, - 5.74456264653802865985, - 5.83095189484530047087, - 5.91607978309961604256, - 6.00000000000000000000, - 6.08276253029821968899, - 6.16441400296897645025, - 6.24499799839839820584, - 6.32455532033675866399, - 6.40312423743284868648, - 6.48074069840786023096, - 6.55743852430200065234, - 6.63324958071079969822, - 6.70820393249936908922, - 6.78232998312526813906, - 6.85565460040104412493, - 6.92820323027550917410, - 7.00000000000000000000, - 7.07106781186547524400, - 7.14142842854284999799, - 7.21110255092797858623, - 7.28010988928051827109, - 7.34846922834953429459, - 7.41619848709566294871, - 7.48331477354788277116, - 7.54983443527074969723, - 7.61577310586390828566, - 7.68114574786860817576, - 7.74596669241483377035, - 7.81024967590665439412, - 7.87400787401181101968, - 7.93725393319377177150, -#endif }; //printf("sqrt %d\n", x); if (x < sizeof(table) / sizeof(*table)) { return table[x]; } else { return sqrt(x); -#if 0 - int y = 0; - int base = 1 << (sizeof(int) * 8 - 2); - if ((x & 0xFFFF0000) == 0) base >>= 16; - if ((x & 0xFF00FF00) == 0) base >>= 8; - if ((x & 0xF0F0F0F0) == 0) base >>= 4; - if ((x & 0xCCCCCCCC) == 0) base >>= 2; - // "base" starts at the highest power of four <= the argument. - - while (base > 0) { - if (x >= y + base) { - x -= y + base; - y += base << 1; - } - y >>= 1; - base >>= 2; - } - printf("sqrt %d = %d\n", x, y); - return y; -#endif } } @@ -421,8 +299,6 @@ policy_ucb1amaf_init(struct uct *u, char *arg) } else if (!strcasecmp(optname, "rave")) { if (optval && *optval == '0') p->descend = ucb1_descend; - else if (optval && *optval == 'o') - p->descend = ucb1orave_descend; else if (optval && *optval == 's') p->descend = ucb1srave_descend; } else if (!strcasecmp(optname, "explore_p_rave") && optval) { -- 2.11.4.GIT