Distributed engine: the slave now receives stats in binary form.
[pachi.git] / uct / slave.c
blobb1df4c2111e83cb5ab983023732fd3bfc7a4610e
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #define MAX_VERBOSE_LOGS 1000
8 #define DEBUG
10 #include "debug.h"
11 #include "board.h"
12 #include "gtp.h"
13 #include "move.h"
14 #include "timeinfo.h"
15 #include "distributed/distributed.h"
16 #include "uct/internal.h"
17 #include "uct/search.h"
18 #include "uct/slave.h"
19 #include "uct/tree.h"
22 /* UCT infrastructure for a distributed engine slave. */
24 enum parse_code
25 uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
27 struct uct *u = e->data;
29 static bool board_resized = false;
30 if (is_gamestart(cmd)) {
31 board_resized = true;
32 uct_pondering_stop(u);
35 /* Force resending the whole command history if we are out of sync
36 * but do it only once, not if already getting the history. */
37 if ((move_number(id) != b->moves || !board_resized)
38 && !reply_disabled(id) && !is_reset(cmd)) {
39 if (UDEBUGL(0))
40 fprintf(stderr, "Out of sync, id %d, move %d\n", id, b->moves);
42 /* Skip rest of multi-line command (genmoves only) */
43 if (!strcasecmp(cmd, "pachi-genmoves")
44 || !strcasecmp(cmd, "pachi-genmoves_cleanup")) {
45 char line[128];
46 while (fgets(line, sizeof(line), stdin) && *line != '\n') ;
49 static char buf[128];
50 snprintf(buf, sizeof(buf), "out of sync, move %d expected", b->moves);
51 *reply = buf;
52 return P_DONE_ERROR;
54 return reply_disabled(id) ? P_NOREPLY : P_OK;
58 /* Read the move stats sent by the master, as a binary array of
59 * incr_stats structs. The stats come sorted by increasing coord path.
60 * To simplify the code, we assume that master and slave have the same
61 * architecture (store values identically).
62 * Keep this code in sync with distributed/distributed.c:select_best_move().
63 * Return true if ok, false if error. */
64 static bool
65 receive_stats(struct uct *u, int size)
67 if (size % sizeof(struct incr_stats)) return false;
68 int nodes = size / sizeof(struct incr_stats);
70 assert(nodes);
71 double start_time = time_now();
73 for (int n = 0; n < nodes; n++) {
74 struct incr_stats is;
75 if (fread(&is, sizeof(struct incr_stats), 1, stdin) != 1)
76 return false;
78 if (UDEBUGL(7))
79 fprintf(stderr, "read %5d/%d %6d %.3f %"PRIpath"\n", n, nodes,
80 is.incr.playouts, is.incr.value, is.coord_path);
82 /* TODO: update the stats in the tree. */
84 if (DEBUGVV(2))
85 fprintf(stderr, "read args for %d nodes in %.4fms\n", nodes,
86 (time_now() - start_time)*1000);
87 return true;
90 /* Get stats updates for the distributed engine. Return a buffer with
91 * one line "played_own root_playouts threads keep_looking" then a list
92 * of lines "coord playouts value amaf_playouts amaf_value".
93 * The last line must not end with \n.
94 * If c is pass or resign, add this move with root->playouts weight.
95 * This function is called only by the main thread, but may be
96 * called while the tree is updated by the worker threads.
97 * Keep this code in sync with select_best_move(). */
98 static char *
99 report_stats(struct uct *u, struct board *b, coord_t c, bool keep_looking)
101 static char reply[10240];
102 char *r = reply;
103 char *end = reply + sizeof(reply);
104 struct tree_node *root = u->t->root;
105 r += snprintf(r, end - r, "%d %d %d %d", u->played_own, root->u.playouts, u->threads, keep_looking);
106 int min_playouts = root->u.playouts / 100;
108 /* Give a large weight to pass or resign, but still allow other moves.
109 * Only root->u.playouts will be used (majority vote) but send amaf
110 * stats too for consistency. */
111 if (is_pass(c) || is_resign(c))
112 r += snprintf(r, end - r, "\n%s %d %.1f %d %.1f", coord2sstr(c, b),
113 root->u.playouts, 0.0, root->amaf.playouts, 0.0);
115 /* We rely on the fact that root->children is set only
116 * after all children are created. */
117 for (struct tree_node *ni = root->children; ni; ni = ni->sibling) {
119 if (is_pass(ni->coord)) continue;
120 struct node_stats *ns = &u->stats[ni->coord];
121 ns->last_sent_own.u.playouts = ns->last_sent_own.amaf.playouts = 0;
122 ns->node = ni;
123 if (ni->u.playouts <= min_playouts || ni->hints & TREE_HINT_INVALID)
124 continue;
126 char *coord = coord2sstr(ni->coord, b);
127 /* We return the values as stored in the tree, so from black's view.
128 * own = total_in_tree - added_from_others */
129 struct move_stats2 s = { .u = ni->u, .amaf = ni->amaf };
130 struct move_stats2 others = ns->added_from_others;
131 if (s.u.playouts - others.u.playouts <= min_playouts)
132 continue;
133 if (others.u.playouts)
134 stats_rm_result(&s.u, others.u.value, others.u.playouts);
135 if (others.amaf.playouts)
136 stats_rm_result(&s.amaf, others.amaf.value, others.amaf.playouts);
138 r += snprintf(r, end - r, "\n%s %d %.7f %d %.7f", coord,
139 s.u.playouts, s.u.value, s.amaf.playouts, s.amaf.value);
140 ns->last_sent_own = s;
141 /* If the master discards these values because this slave
142 * is out of sync, u->stats will be reset anyway. */
144 return reply;
147 /* How long to wait in slave for initial stats to build up before
148 * replying to the genmoves command (in seconds) */
149 #define MIN_STATS_INTERVAL 0.05 /* 50ms */
151 /* genmoves is issued by the distributed engine master to all slaves, to:
152 * 1. Start a MCTS search if not running yet
153 * 2. Report current move statistics of the on-going search.
154 * The MCTS search is left running on the background when uct_genmoves()
155 * returns. It is stopped by receiving a play GTP command, triggering
156 * uct_pondering_stop(). */
157 /* genmoves gets in the args parameter
158 * "played_games nodes main_time byoyomi_time byoyomi_periods byoyomi_stones @size"
159 * and reads a binary array of coord, playouts, value to get stats of other slaves,
160 * except possibly for the first call at a given move number.
161 * See report_stats() for the description of the return value. */
162 char *
163 uct_genmoves(struct engine *e, struct board *b, struct time_info *ti, enum stone color,
164 char *args, bool pass_all_alive)
166 struct uct *u = e->data;
167 assert(u->slave);
169 /* Prepare the state if the search is not already running.
170 * We must do this first since we tweak the state below
171 * based on instructions from the master. */
172 if (!thread_manager_running)
173 uct_genmove_setup(u, b, color);
175 /* Get playouts and time information from master. Keep this code
176 * in sync with distibuted/distributed.c:distributed_genmove(). */
177 if ((ti->dim == TD_WALLTIME
178 && sscanf(args, "%d %lf %lf %d %d", &u->played_all,
179 &ti->len.t.main_time, &ti->len.t.byoyomi_time,
180 &ti->len.t.byoyomi_periods, &ti->len.t.byoyomi_stones) != 5)
182 || (ti->dim == TD_GAMES && sscanf(args, "%d", &u->played_all) != 1)) {
183 return NULL;
186 static struct uct_search_state s;
187 if (!thread_manager_running) {
188 /* This is the first genmoves issue, start the MCTS
189 * now and let it run while we receive stats. */
190 memset(&s, 0, sizeof(s));
191 uct_search_start(u, b, color, u->t, ti, &s);
194 /* Read binary incremental stats if present, otherwise
195 * wait a bit to populate the statistics. */
196 int size = 0;
197 char *sizep = strchr(args, '@');
198 if (sizep) size = atoi(sizep+1);
199 if (!size) {
200 time_sleep(MIN_STATS_INTERVAL);
201 } else if (!receive_stats(u, size)) {
202 return NULL;
205 /* Check the state of the Monte Carlo Tree Search. */
207 int played_games = uct_search_games(&s);
208 uct_search_progress(u, b, color, u->t, ti, &s, played_games);
209 u->played_own = played_games - s.base_playouts;
211 bool keep_looking = !uct_search_check_stop(u, b, color, u->t, ti, &s, played_games);
212 coord_t best_coord;
213 uct_search_result(u, b, color, pass_all_alive, played_games, s.base_playouts, &best_coord);
215 char *reply = report_stats(u, b, best_coord, keep_looking);
216 return reply;