Distributed engine: fix check for resending same command.
[pachi/ann.git] / uct / slave.c
blob862968ad8349382830150a1e87f500861c26175b
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #define DEBUG
9 #include "debug.h"
10 #include "board.h"
11 #include "gtp.h"
12 #include "move.h"
13 #include "timeinfo.h"
14 #include "distributed/distributed.h"
15 #include "uct/internal.h"
16 #include "uct/search.h"
17 #include "uct/slave.h"
18 #include "uct/tree.h"
21 /* UCT infrastructure for a distributed engine slave. */
23 enum parse_code
24 uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
26 struct uct *u = e->data;
28 static bool board_resized = false;
29 board_resized |= is_gamestart(cmd);
31 /* Force resending the whole command history if we are out of sync
32 * but do it only once, not if already getting the history. */
33 if ((move_number(id) != b->moves || !board_resized)
34 && !reply_disabled(id) && !is_reset(cmd)) {
35 if (UDEBUGL(0))
36 fprintf(stderr, "Out of sync, id %d, move %d\n", id, b->moves);
37 static char buf[128];
38 snprintf(buf, sizeof(buf), "out of sync, move %d expected", b->moves);
39 *reply = buf;
40 return P_DONE_ERROR;
42 return reply_disabled(id) ? P_NOREPLY : P_OK;
46 /* Set mapping from coordinates to children of the root node. */
47 static void
48 find_top_nodes(struct uct *u)
50 if (!u->t || !u->t->root) return;
52 for (struct tree_node *ni = u->t->root->children; ni; ni = ni->sibling) {
53 if (!is_pass(ni->coord))
54 u->stats[ni->coord].node = ni;
58 /* Get the move stats if they are present. They are
59 * coord-sorted but the code here doesn't depend on this.
60 * Keep this code in sync with select_best_move(). */
61 static bool
62 receive_stats(struct uct *u, struct board *b)
64 char line[128];
65 while (fgets(line, sizeof(line), stdin) && *line != '\n') {
66 char move[64];
67 struct move_stats2 s;
68 if (sscanf(line, "%63s %d %f %d %f", move,
69 &s.u.playouts, &s.u.value,
70 &s.amaf.playouts, &s.amaf.value) != 5)
71 return false;
72 coord_t *c_ = str2coord(move, board_size(b));
73 coord_t c = *c_;
74 coord_done(c_);
75 assert(!is_pass(c) && !is_resign(c));
77 struct node_stats *ns = &u->stats[c];
78 if (!ns->node) find_top_nodes(u);
79 /* The node may not exist if this slave was behind
80 * but this should be rare so it is not worth creating
81 * the node here. */
82 if (!ns->node) {
83 if (DEBUGL(2))
84 fprintf(stderr, "can't find node %s %d\n", move, c);
85 continue;
88 /* The master may not send moves below a certain threshold,
89 * but if it sends one it includes the contributions from
90 * all slaves including ours (last_sent_own):
91 * received_others = received_total - last_sent_own */
92 if (ns->last_sent_own.u.playouts)
93 stats_rm_result(&s.u, ns->last_sent_own.u.value,
94 ns->last_sent_own.u.playouts);
95 if (ns->last_sent_own.amaf.playouts)
96 stats_rm_result(&s.amaf, ns->last_sent_own.amaf.value,
97 ns->last_sent_own.amaf.playouts);
99 /* others_delta = received_others - added_from_others */
100 struct move_stats2 delta = s;
101 if (ns->added_from_others.u.playouts)
102 stats_rm_result(&delta.u, ns->added_from_others.u.value,
103 ns->added_from_others.u.playouts);
104 /* delta may be <= 0 if some slaves stopped sending this move
105 * because it became below a playouts threshold. In this case
106 * we just keep the old stats in our tree. */
107 if (delta.u.playouts <= 0) continue;
109 if (ns->added_from_others.amaf.playouts)
110 stats_rm_result(&delta.amaf, ns->added_from_others.amaf.value,
111 ns->added_from_others.amaf.playouts);
113 stats_add_result(&ns->node->u, delta.u.value, delta.u.playouts);
114 stats_add_result(&ns->node->amaf, delta.amaf.value, delta.amaf.playouts);
115 ns->added_from_others = s;
117 return true;
120 /* Get stats updates for the distributed engine. Return a buffer with
121 * one line "played_own root_playouts threads keep_looking" then a list
122 * of lines "coord playouts value amaf_playouts amaf_value".
123 * The last line must not end with \n.
124 * If c is pass or resign, add this move with root->playouts weight.
125 * This function is called only by the main thread, but may be
126 * called while the tree is updated by the worker threads.
127 * Keep this code in sync with select_best_move(). */
128 static char *
129 report_stats(struct uct *u, struct board *b, coord_t c, bool keep_looking)
131 static char reply[10240];
132 char *r = reply;
133 char *end = reply + sizeof(reply);
134 struct tree_node *root = u->t->root;
135 r += snprintf(r, end - r, "%d %d %d %d", u->played_own, root->u.playouts, u->threads, keep_looking);
136 int min_playouts = root->u.playouts / 100;
138 /* Give a large weight to pass or resign, but still allow other moves.
139 * Only root->u.playouts will be used (majority vote) but send amaf
140 * stats too for consistency. */
141 if (is_pass(c) || is_resign(c))
142 r += snprintf(r, end - r, "\n%s %d %.1f %d %.1f", coord2sstr(c, b),
143 root->u.playouts, 0.0, root->amaf.playouts, 0.0);
145 /* We rely on the fact that root->children is set only
146 * after all children are created. */
147 for (struct tree_node *ni = root->children; ni; ni = ni->sibling) {
149 if (is_pass(ni->coord)) continue;
150 struct node_stats *ns = &u->stats[ni->coord];
151 ns->last_sent_own.u.playouts = ns->last_sent_own.amaf.playouts = 0;
152 ns->node = ni;
153 if (ni->u.playouts <= min_playouts || ni->hints & TREE_HINT_INVALID)
154 continue;
156 char *coord = coord2sstr(ni->coord, b);
157 /* We return the values as stored in the tree, so from black's view.
158 * own = total_in_tree - added_from_others */
159 struct move_stats2 s = { .u = ni->u, .amaf = ni->amaf };
160 struct move_stats2 others = ns->added_from_others;
161 if (s.u.playouts - others.u.playouts <= min_playouts)
162 continue;
163 if (others.u.playouts)
164 stats_rm_result(&s.u, others.u.value, others.u.playouts);
165 if (others.amaf.playouts)
166 stats_rm_result(&s.amaf, others.amaf.value, others.amaf.playouts);
168 r += snprintf(r, end - r, "\n%s %d %.7f %d %.7f", coord,
169 s.u.playouts, s.u.value, s.amaf.playouts, s.amaf.value);
170 ns->last_sent_own = s;
171 /* If the master discards these values because this slave
172 * is out of sync, u->stats will be reset anyway. */
174 return reply;
177 /* genmoves is issued by the distributed engine master to all slaves, to:
178 * 1. Start a MCTS search if not running yet
179 * 2. Report current move statistics of the on-going search.
180 * The MCTS search is left running on the background when uct_genmoves()
181 * returns. It is stopped by receiving a play GTP command, triggering
182 * uct_pondering_stop(). */
183 /* genmoves gets in the args parameter
184 * "played_games main_time byoyomi_time byoyomi_periods byoyomi_stones"
185 * and reads a list of lines "coord playouts value amaf_playouts amaf_value"
186 * to get stats of other slaves, except for the first call at a given move number.
187 * See uct_getstats() for the description of the return value. */
188 char *
189 uct_genmoves(struct engine *e, struct board *b, struct time_info *ti, enum stone color,
190 char *args, bool pass_all_alive)
192 struct uct *u = e->data;
193 assert(u->slave);
195 /* Prepare the state if the search is not already running.
196 * We must do this first since we tweak the state below
197 * based on instructions from the master. */
198 if (!thread_manager_running)
199 uct_genmove_setup(u, b, color);
201 /* Get playouts and time information from master.
202 * Keep this code in sync with distributed_genmove(). */
203 if ((ti->dim == TD_WALLTIME
204 && sscanf(args, "%d %lf %lf %d %d", &u->played_all, &ti->len.t.main_time,
205 &ti->len.t.byoyomi_time, &ti->len.t.byoyomi_periods,
206 &ti->len.t.byoyomi_stones) != 5)
208 || (ti->dim == TD_GAMES && sscanf(args, "%d", &u->played_all) != 1)) {
209 return NULL;
212 if (!receive_stats(u, b))
213 return NULL;
215 static struct uct_search_state s;
216 if (!thread_manager_running) {
217 /* This is the first genmoves issue, start the MCTS. */
218 memset(&s, 0, sizeof(s));
219 uct_search_start(u, b, color, u->t, ti, &s);
221 /* Wait a bit to populate the statistics
222 * and avoid a busy loop with the master. */
223 time_sleep(TREE_BUSYWAIT_INTERVAL);
225 /* Check the state of the Monte Carlo Tree Search. */
227 int played_games = uct_search_games(&s);
228 uct_search_progress(u, b, color, u->t, ti, &s, played_games);
229 u->played_own = played_games;
231 bool keep_looking = !uct_search_check_stop(u, b, color, u->t, ti, &s, played_games);
232 coord_t best_coord;
233 uct_search_result(u, b, color, pass_all_alive, played_games, s.base_playouts, &best_coord);
235 char *reply = report_stats(u, b, best_coord, keep_looking);
236 return reply;