Distributed engine: use modulo arithmetic for index in command history.
[pachi/json.git] / uct / slave.c
blob98d726a472cb3ff1a05c53a526d4decd284a8590
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #define DEBUG
9 #include "debug.h"
10 #include "board.h"
11 #include "gtp.h"
12 #include "move.h"
13 #include "timeinfo.h"
14 #include "distributed/distributed.h"
15 #include "uct/internal.h"
16 #include "uct/search.h"
17 #include "uct/slave.h"
18 #include "uct/tree.h"
21 /* UCT infrastructure for a distributed engine slave. */
23 enum parse_code
24 uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
26 struct uct *u = e->data;
28 static bool board_resized = false;
29 if (is_gamestart(cmd)) {
30 board_resized = true;
31 uct_pondering_stop(u);
34 /* Force resending the whole command history if we are out of sync
35 * but do it only once, not if already getting the history. */
36 if ((move_number(id) != b->moves || !board_resized)
37 && !reply_disabled(id) && !is_reset(cmd)) {
38 if (UDEBUGL(0))
39 fprintf(stderr, "Out of sync, id %d, move %d\n", id, b->moves);
41 /* Skip rest of multi-line command (genmoves only) */
42 if (!strcasecmp(cmd, "pachi-genmoves")
43 || !strcasecmp(cmd, "pachi-genmoves_cleanup")) {
44 char line[128];
45 while (fgets(line, sizeof(line), stdin) && *line != '\n') ;
48 static char buf[128];
49 snprintf(buf, sizeof(buf), "out of sync, move %d expected", b->moves);
50 *reply = buf;
51 return P_DONE_ERROR;
53 return reply_disabled(id) ? P_NOREPLY : P_OK;
57 /* Set mapping from coordinates to children of the root node. */
58 static void
59 find_top_nodes(struct uct *u)
61 if (!u->t || !u->t->root) return;
63 for (struct tree_node *ni = u->t->root->children; ni; ni = ni->sibling) {
64 if (!is_pass(ni->coord))
65 u->stats[ni->coord].node = ni;
69 /* Get the move stats if they are present. They are
70 * coord-sorted but the code here doesn't depend on this.
71 * Keep this code in sync with select_best_move(). */
72 static bool
73 receive_stats(struct uct *u, struct board *b)
75 char line[128];
76 while (fgets(line, sizeof(line), stdin) && *line != '\n') {
77 char move[64];
78 struct move_stats2 s;
79 if (sscanf(line, "%63s %d %f %d %f", move,
80 &s.u.playouts, &s.u.value,
81 &s.amaf.playouts, &s.amaf.value) != 5)
82 return false;
83 coord_t *c_ = str2coord(move, board_size(b));
84 coord_t c = *c_;
85 coord_done(c_);
86 assert(!is_pass(c) && !is_resign(c));
88 struct node_stats *ns = &u->stats[c];
89 if (!ns->node) find_top_nodes(u);
90 /* The node may not exist if this slave was behind
91 * but this should be rare so it is not worth creating
92 * the node here. */
93 if (!ns->node) {
94 if (DEBUGL(2))
95 fprintf(stderr, "can't find node %s %d\n", move, c);
96 continue;
99 /* The master may not send moves below a certain threshold,
100 * but if it sends one it includes the contributions from
101 * all slaves including ours (last_sent_own):
102 * received_others = received_total - last_sent_own */
103 if (ns->last_sent_own.u.playouts)
104 stats_rm_result(&s.u, ns->last_sent_own.u.value,
105 ns->last_sent_own.u.playouts);
106 if (ns->last_sent_own.amaf.playouts)
107 stats_rm_result(&s.amaf, ns->last_sent_own.amaf.value,
108 ns->last_sent_own.amaf.playouts);
110 /* others_delta = received_others - added_from_others */
111 struct move_stats2 delta = s;
112 if (ns->added_from_others.u.playouts)
113 stats_rm_result(&delta.u, ns->added_from_others.u.value,
114 ns->added_from_others.u.playouts);
115 /* delta may be <= 0 if some slaves stopped sending this move
116 * because it became below a playouts threshold. In this case
117 * we just keep the old stats in our tree. */
118 if (delta.u.playouts <= 0) continue;
120 if (ns->added_from_others.amaf.playouts)
121 stats_rm_result(&delta.amaf, ns->added_from_others.amaf.value,
122 ns->added_from_others.amaf.playouts);
124 stats_add_result(&ns->node->u, delta.u.value, delta.u.playouts);
125 stats_add_result(&ns->node->amaf, delta.amaf.value, delta.amaf.playouts);
126 ns->added_from_others = s;
128 return true;
131 /* Get stats updates for the distributed engine. Return a buffer with
132 * one line "played_own root_playouts threads keep_looking" then a list
133 * of lines "coord playouts value amaf_playouts amaf_value".
134 * The last line must not end with \n.
135 * If c is pass or resign, add this move with root->playouts weight.
136 * This function is called only by the main thread, but may be
137 * called while the tree is updated by the worker threads.
138 * Keep this code in sync with select_best_move(). */
139 static char *
140 report_stats(struct uct *u, struct board *b, coord_t c, bool keep_looking)
142 static char reply[10240];
143 char *r = reply;
144 char *end = reply + sizeof(reply);
145 struct tree_node *root = u->t->root;
146 r += snprintf(r, end - r, "%d %d %d %d", u->played_own, root->u.playouts, u->threads, keep_looking);
147 int min_playouts = root->u.playouts / 100;
149 /* Give a large weight to pass or resign, but still allow other moves.
150 * Only root->u.playouts will be used (majority vote) but send amaf
151 * stats too for consistency. */
152 if (is_pass(c) || is_resign(c))
153 r += snprintf(r, end - r, "\n%s %d %.1f %d %.1f", coord2sstr(c, b),
154 root->u.playouts, 0.0, root->amaf.playouts, 0.0);
156 /* We rely on the fact that root->children is set only
157 * after all children are created. */
158 for (struct tree_node *ni = root->children; ni; ni = ni->sibling) {
160 if (is_pass(ni->coord)) continue;
161 struct node_stats *ns = &u->stats[ni->coord];
162 ns->last_sent_own.u.playouts = ns->last_sent_own.amaf.playouts = 0;
163 ns->node = ni;
164 if (ni->u.playouts <= min_playouts || ni->hints & TREE_HINT_INVALID)
165 continue;
167 char *coord = coord2sstr(ni->coord, b);
168 /* We return the values as stored in the tree, so from black's view.
169 * own = total_in_tree - added_from_others */
170 struct move_stats2 s = { .u = ni->u, .amaf = ni->amaf };
171 struct move_stats2 others = ns->added_from_others;
172 if (s.u.playouts - others.u.playouts <= min_playouts)
173 continue;
174 if (others.u.playouts)
175 stats_rm_result(&s.u, others.u.value, others.u.playouts);
176 if (others.amaf.playouts)
177 stats_rm_result(&s.amaf, others.amaf.value, others.amaf.playouts);
179 r += snprintf(r, end - r, "\n%s %d %.7f %d %.7f", coord,
180 s.u.playouts, s.u.value, s.amaf.playouts, s.amaf.value);
181 ns->last_sent_own = s;
182 /* If the master discards these values because this slave
183 * is out of sync, u->stats will be reset anyway. */
185 return reply;
188 /* genmoves is issued by the distributed engine master to all slaves, to:
189 * 1. Start a MCTS search if not running yet
190 * 2. Report current move statistics of the on-going search.
191 * The MCTS search is left running on the background when uct_genmoves()
192 * returns. It is stopped by receiving a play GTP command, triggering
193 * uct_pondering_stop(). */
194 /* genmoves gets in the args parameter
195 * "played_games main_time byoyomi_time byoyomi_periods byoyomi_stones"
196 * and reads a list of lines "coord playouts value amaf_playouts amaf_value"
197 * to get stats of other slaves, except for the first call at a given move number.
198 * See uct_getstats() for the description of the return value. */
199 char *
200 uct_genmoves(struct engine *e, struct board *b, struct time_info *ti, enum stone color,
201 char *args, bool pass_all_alive)
203 struct uct *u = e->data;
204 assert(u->slave);
206 /* Prepare the state if the search is not already running.
207 * We must do this first since we tweak the state below
208 * based on instructions from the master. */
209 if (!thread_manager_running)
210 uct_genmove_setup(u, b, color);
212 /* Get playouts and time information from master.
213 * Keep this code in sync with distributed_genmove(). */
214 if ((ti->dim == TD_WALLTIME
215 && sscanf(args, "%d %lf %lf %d %d", &u->played_all, &ti->len.t.main_time,
216 &ti->len.t.byoyomi_time, &ti->len.t.byoyomi_periods,
217 &ti->len.t.byoyomi_stones) != 5)
219 || (ti->dim == TD_GAMES && sscanf(args, "%d", &u->played_all) != 1)) {
220 return NULL;
223 if (!receive_stats(u, b))
224 return NULL;
226 static struct uct_search_state s;
227 if (!thread_manager_running) {
228 /* This is the first genmoves issue, start the MCTS. */
229 memset(&s, 0, sizeof(s));
230 uct_search_start(u, b, color, u->t, ti, &s);
232 /* Wait a bit to populate the statistics
233 * and avoid a busy loop with the master. */
234 time_sleep(TREE_BUSYWAIT_INTERVAL);
236 /* Check the state of the Monte Carlo Tree Search. */
238 int played_games = uct_search_games(&s);
239 uct_search_progress(u, b, color, u->t, ti, &s, played_games);
240 u->played_own = played_games;
242 bool keep_looking = !uct_search_check_stop(u, b, color, u->t, ti, &s, played_games);
243 coord_t best_coord;
244 uct_search_result(u, b, color, pass_all_alive, played_games, s.base_playouts, &best_coord);
246 char *reply = report_stats(u, b, best_coord, keep_looking);
247 return reply;