From 87785c28c9866b8affb1f96a0aaaaf9a89be9a6d Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Fri, 15 Jul 2011 18:59:56 +0200 Subject: [PATCH] Distributed engine: introduce virtual_win, root_virtual_win, vwin_min_playouts Encourage different slaves to work on different parts of the tree by adding virtual wins to different nodes. Higher bias can be given to children of the root node with root_virtual_win. This improves scalability to more slaves (about 40 elo gain for 64 slaves). --- uct/internal.h | 2 ++ uct/policy/ucb1amaf.c | 29 ++++++++++++++++++++++++++++- uct/uct.c | 8 ++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/uct/internal.h b/uct/internal.h index 2aefd17..3dfde19 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -55,6 +55,8 @@ struct uct { bool pondering_opt; /* User wants pondering */ bool pondering; /* Actually pondering now */ bool slave; /* Act as slave in distributed engine. */ + int max_slaves; /* Optional, -1 if not set */ + int slave_index; /* 0..max_slaves-1, or -1 if not set */ enum stone my_color; int fuseki_end; diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 9f6686e..41402d4 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -20,6 +20,11 @@ struct ucb1_policy_amaf { * produce way too wide searches; reduce this to get deeper and * narrower readouts - try 0.2. */ floating_t explore_p; + /* In distributed mode, encourage different slaves to work on different + * parts of the tree by adding virtual wins to different nodes. */ + int virtual_win; + int root_virtual_win; + int vwin_min_playouts; /* First Play Urgency - if set to less than infinity (the MoGo paper * above reports 1.0 as the best), new branches are explored only * if none of the existing ones has higher urgency than fpu. */ @@ -155,11 +160,22 @@ ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *de floating_t nconf = 1.f; if (b->explore_p > 0) nconf = sqrt(log(descent->node->u.playouts + descent->node->prior.playouts)); + struct uct *u = p->uct; + int vwin = 0; + if (u->max_slaves > 0 && u->slave_index >= 0) + vwin = descent->node == tree->root ? b->root_virtual_win : b->virtual_win; + int child = 0; - uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) { + uctd_try_node_children(tree, descent, allow_pass, parity, u->tenuki_d, di, urgency) { struct tree_node *ni = di.node; urgency = ucb1rave_evaluate(p, tree, &di, parity); + /* In distributed mode, encourage different slaves to work on different + * parts of the tree. We rely on the fact that children (if they exist) + * are the same and in the same order in all slaves. */ + if (vwin > 0 && ni->u.playouts > b->vwin_min_playouts && (child - u->slave_index) % u->max_slaves == 0) + urgency += vwin / (ni->u.playouts + vwin); + if (ni->u.playouts > 0 && b->explore_p > 0) { urgency += b->explore_p * nconf / fast_sqrt(ni->u.playouts); @@ -281,6 +297,9 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->crit_negative = 1; b->crit_amaf = 0; + b->root_virtual_win = -1; + b->vwin_min_playouts = 1000; + if (arg) { char *optspec, *next = arg; while (*next) { @@ -312,6 +331,12 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->crit_negative = !optval || *optval == '1'; } else if (!strcasecmp(optname, "crit_amaf")) { b->crit_amaf = !optval || *optval == '1'; + } else if (!strcasecmp(optname, "virtual_win") && optval) { + b->virtual_win = atoi(optval); + } else if (!strcasecmp(optname, "root_virtual_win") && optval) { + b->root_virtual_win = atoi(optval); + } else if (!strcasecmp(optname, "vwin_min_playouts") && optval) { + b->vwin_min_playouts = atoi(optval); } else { fprintf(stderr, "ucb1amaf: Invalid policy argument %s or missing value\n", optname); @@ -319,6 +344,8 @@ policy_ucb1amaf_init(struct uct *u, char *arg) } } } + if (b->root_virtual_win < 0) + b->root_virtual_win = b->virtual_win; return p; } diff --git a/uct/uct.c b/uct/uct.c index 9234e0b..ce32a3f 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -568,6 +568,8 @@ uct_state_init(char *arg, struct board *b) u->local_tree_rootgoal = true; u->local_tree_neival = true; + u->max_slaves = -1; + u->slave_index = -1; u->stats_delay = 0.01; // 10 ms u->plugins = pluginset_init(b); @@ -941,6 +943,12 @@ uct_state_init(char *arg, struct board *b) } else if (!strcasecmp(optname, "slave")) { /* Act as slave for the distributed engine. */ u->slave = !optval || atoi(optval); + } else if (!strcasecmp(optname, "slave_index") && optval) { + /* Optional index if per-slave behavior is desired. + * Must be given as index/max */ + u->slave_index = atoi(optval); + char *p = strchr(optval, '/'); + if (p) u->max_slaves = atoi(++p); } else if (!strcasecmp(optname, "shared_nodes") && optval) { /* Share at most shared_nodes between master and slave at each genmoves. * Must use the same value in master and slaves. */ -- 2.11.4.GIT