Distributed engine: Defined shared_nodes, initialize default slave state
[pachi.git] / distributed / distributed.c
blobcd0110e9a99c5b41b971d0e3dd666f05ef8803b3
1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends pachi-genmoves gtp commands regularly to each slave,
6 * gets as replies a list of nodes, their number of playouts
7 * and their value. The master then picks the most popular move
8 * among the top level nodes. */
10 /* With time control, the master waits for all slaves, except
11 * when the allowed time is already passed. In this case the
12 * master picks among the available replies, or waits for just
13 * one reply if there is none yet.
14 * Without time control, the master waits until the desired
15 * number of games have been simulated. In this case the -t
16 * parameter for the master should be the sum of the parameters
17 * for all slaves. */
19 /* The master sends updated statistics for the best nodes in each
20 * genmoves command. They are incremental updates from all other
21 * slaves (so they exclude contributions from the target slave).
22 * The slaves reply with just their own stats. So both master and
23 * slave remember what was previously sent. A slave remembers in
24 * the tree ("pu" field), which is stable across moves. The slave
25 * also has a temporary hash table to map received coord paths
26 * to tree nodes; the hash table is cleared at each new move.
27 * The master remembers stats in a queue of received buffers that
28 * are merged together, plus one hash table per slave. The master
29 * queue and the hash tables are cleared at each new move. */
30 /* This version only has the slave receiving part, the rest
31 * comes in subsequent commits. */
33 /* To allow the master to select the best move, slaves also send
34 * absolute playout counts for the best top level nodes (children
35 * of the root node), including contributions from other slaves.
36 * The master sums these counts and picks the best sum, which is
37 * equivalent to picking the best average. (The master cannot
38 * use the incremental stats sent in binary form because they
39 * are not maintained across moves, so playouts from previous
40 * moves would be lost.) */
42 /* The master-slave protocol has fault tolerance. If a slave is
43 * out of sync, the master sends it the appropriate command history. */
45 /* Pass me arguments like a=b,c=d,...
46 * Supported arguments:
47 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
48 * max_slaves=MAX_SLAVES default 24
49 * shared_nodes=SHARED_NODES default 10K
50 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
51 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
52 * Warning: with proxy_port, the master stderr mixes the logs of all
53 * machines but you can separate them again:
54 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
55 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
58 /* A configuration without proxy would have one master run on masterhost as:
59 * zzgo -e distributed slave_port=1234
60 * and N slaves running as:
61 * zzgo -e uct -g masterhost:1234 slave
62 * With log proxy:
63 * zzgo -e distributed slave_port=1234,proxy_port=1235
64 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
65 * If the master itself runs on a machine other than that running gogui,
66 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
67 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
70 #include <assert.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <limits.h>
75 #include <time.h>
76 #include <alloca.h>
77 #include <sys/types.h>
79 #define DEBUG
81 #include "engine.h"
82 #include "move.h"
83 #include "timeinfo.h"
84 #include "playout.h"
85 #include "stats.h"
86 #include "mq.h"
87 #include "debug.h"
88 #include "distributed/distributed.h"
89 #include "distributed/protocol.h"
91 /* Internal engine state. */
92 struct distributed {
93 char *slave_port;
94 char *proxy_port;
95 int max_slaves;
96 int shared_nodes;
97 bool slaves_quit;
98 struct move my_last_move;
99 struct move_stats my_last_stats;
102 /* Default number of simulations to perform per move.
103 * Note that this is in total over all slaves! */
104 #define DIST_GAMES 80000
105 static const struct time_info default_ti = {
106 .period = TT_MOVE,
107 .dim = TD_GAMES,
108 .len = { .games = DIST_GAMES },
111 #define get_value(value, color) \
112 ((color) == S_BLACK ? (value) : 1 - (value))
115 /* Maximum time (seconds) to wait for answers to fast gtp commands
116 * (all commands except pachi-genmoves and final_status_list). */
117 #define MAX_FAST_CMD_WAIT 1.0
119 /* Maximum time (seconds) to wait for answers to genmoves. */
120 #define MAX_GENMOVES_WAIT 0.1 /* 100 ms */
123 /* Display a path as leaf<parent<grandparent...
124 * Returns the path string in a static buffer; it is NOT safe for
125 * anything but debugging - in particular, it is NOT thread-safe! */
126 char *
127 path2sstr(path_t path, struct board *b)
129 /* Special case for pass and resign. */
130 if (path < 0) return coord2sstr((coord_t)path, b);
132 static char buf[16][64];
133 static int bi = 0;
134 char *b2;
135 b2 = buf[bi++ & 15];
136 *b2 = '\0';
137 char *s = b2;
138 char *end = b2 + 64;
139 coord_t leaf;
140 while ((leaf = leaf_coord(path, b)) != 0) {
141 s += snprintf(s, end - s, "%s<", coord2sstr(leaf, b));
142 path = parent_path(path, b);
144 if (s != b2) s[-1] = '\0';
145 return b2;
148 /* Dispatch a new gtp command to all slaves.
149 * The slave lock must not be held upon entry and is released upon return.
150 * args is empty or ends with '\n' */
151 static enum parse_code
152 distributed_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
154 struct distributed *dist = e->data;
156 /* Commands that should not be sent to slaves.
157 * time_left will be part of next pachi-genmoves,
158 * we reduce latency by not forwarding it here. */
159 if ((!strcasecmp(cmd, "quit") && !dist->slaves_quit)
160 || !strcasecmp(cmd, "uct_genbook")
161 || !strcasecmp(cmd, "uct_dumpbook")
162 || !strcasecmp(cmd, "kgs-chat")
163 || !strcasecmp(cmd, "time_left")
165 /* and commands that will be sent to slaves later */
166 || !strcasecmp(cmd, "genmove")
167 || !strcasecmp(cmd, "kgs-genmove_cleanup")
168 || !strcasecmp(cmd, "final_score")
169 || !strcasecmp(cmd, "final_status_list"))
170 return P_OK;
172 protocol_lock();
174 // Create a new command to be sent by the slave threads.
175 new_cmd(b, cmd, args);
177 /* Wait for replies here. If we don't wait, we run the
178 * risk of getting out of sync with most slaves and
179 * sending command history too frequently. */
180 get_replies(time_now() + MAX_FAST_CMD_WAIT, active_slaves);
182 protocol_unlock();
183 return P_OK;
186 /* genmoves returns a line "=id played_own total_playouts threads keep_looking[ reserved]"
187 * then a list of lines "coord playouts value" with absolute counts for
188 * children of the root node.
189 * Return the move with most playouts, and additional stats.
190 * Keep this code in sync with uct/slave.c:report_stats().
191 * slave_lock is held on entry and on return. */
192 static coord_t
193 select_best_move(struct board *b, struct move_stats *stats, int *played,
194 int *total_playouts, int *total_threads, bool *keep_looking)
196 assert(reply_count > 0);
198 /* +2 for pass and resign */
199 memset(stats-2, 0, (board_size2(b)+2) * sizeof(*stats));
201 coord_t best_move = pass;
202 int best_playouts = -1;
203 *played = 0;
204 *total_playouts = 0;
205 *total_threads = 0;
206 int keep = 0;
208 for (int reply = 0; reply < reply_count; reply++) {
209 char *r = gtp_replies[reply];
210 int id, o, p, t, k;
211 if (sscanf(r, "=%d %d %d %d %d", &id, &o, &p, &t, &k) != 5) continue;
212 *played += o;
213 *total_playouts += p;
214 *total_threads += t;
215 keep += k;
216 // Skip the rest of the firt line if any (allow future extensions)
217 r = strchr(r, '\n');
219 char move[64];
220 struct move_stats s;
221 while (r && sscanf(++r, "%63s %d %f", move, &s.playouts, &s.value) == 3) {
222 coord_t c = str2scoord(move, board_size(b));
223 assert (c >= resign && c < board_size2(b) && s.playouts >= 0);
225 stats_add_result(&stats[c], s.value, s.playouts);
227 if (stats[c].playouts > best_playouts) {
228 best_playouts = stats[c].playouts;
229 best_move = c;
231 r = strchr(r, '\n');
234 *keep_looking = keep > reply_count / 2;
235 return best_move;
238 /* Set the args for the genmoves command. If stats is not null,
239 * append the stats from all slaves above min_playouts, except
240 * for pass and resign. args must have CMDS_SIZE bytes and
241 * upon return ends with an empty line.
242 * Keep this code in sync with uct_genmoves().
243 * slave_lock is held on entry and on return. */
244 static void
245 genmoves_args(char *args, struct board *b, enum stone color, int played,
246 struct time_info *ti, struct move_stats2 *stats, int min_playouts)
248 char *end = args + CMDS_SIZE;
249 char *s = args + snprintf(args, CMDS_SIZE, "%s %d", stone2str(color), played);
251 if (ti->dim == TD_WALLTIME) {
252 s += snprintf(s, end - s, " %.3f %.3f %d %d",
253 ti->len.t.main_time, ti->len.t.byoyomi_time,
254 ti->len.t.byoyomi_periods, ti->len.t.byoyomi_stones);
256 s += snprintf(s, end - s, "\n");
257 if (stats) {
258 foreach_point(b) {
259 if (stats[c].u.playouts <= min_playouts) continue;
260 s += snprintf(s, end - s, "%s %d %.7f %d %.7f\n",
261 coord2sstr(c, b),
262 stats[c].u.playouts, stats[c].u.value,
263 stats[c].amaf.playouts, stats[c].amaf.value);
264 } foreach_point_end;
266 s += snprintf(s, end - s, "\n");
269 /* Time control is mostly done by the slaves, so we use default values here. */
270 #define FUSEKI_END 20
271 #define YOSE_START 40
273 /* In the ascii reply to genmoves, each slave sends absolute counts
274 * including contributions from other slaves. For human display
275 * reduce the sum to an average. */
276 #define stats_average(playouts) ((playouts) / reply_count)
278 /* Regularly send genmoves command to the slaves, and select the best move. */
279 static coord_t *
280 distributed_genmove(struct engine *e, struct board *b, struct time_info *ti,
281 enum stone color, bool pass_all_alive)
283 struct distributed *dist = e->data;
284 double now = time_now();
285 double first = now;
286 char buf[BSIZE]; // debug only
288 char *cmd = pass_all_alive ? "pachi-genmoves_cleanup" : "pachi-genmoves";
289 char args[CMDS_SIZE];
291 coord_t best;
292 int played, playouts, threads;
294 if (ti->period == TT_NULL) *ti = default_ti;
295 struct time_stop stop;
296 time_stop_conditions(ti, b, FUSEKI_END, YOSE_START, &stop);
297 struct time_info saved_ti = *ti;
299 /* Combined move stats from all slaves, only for children
300 * of the root node, plus 2 for pass and resign. */
301 struct move_stats *stats = alloca((board_size2(b)+2) * sizeof(struct move_stats));
302 stats += 2;
304 protocol_lock();
306 /* Send the first genmoves without stats. */
307 genmoves_args(args, b, color, 0, ti, NULL, 0);
308 new_cmd(b, cmd, args);
310 /* Loop until most slaves want to quit or time elapsed. */
311 int iterations;
312 for (iterations = 1; ; iterations++) {
313 double start = now;
314 /* Wait for just one slave to get stats as fresh as possible,
315 * or at most 100ms to check if we run out of time. */
316 get_replies(now + MAX_GENMOVES_WAIT, 1);
317 now = time_now();
318 if (ti->dim == TD_WALLTIME)
319 time_sub(ti, now - start, false);
321 bool keep_looking;
322 best = select_best_move(b, stats, &played, &playouts, &threads, &keep_looking);
324 if (!keep_looking) break;
325 if (ti->dim == TD_WALLTIME) {
326 if (now - ti->len.t.timer_start >= stop.worst.time) break;
327 } else {
328 if (played >= stop.worst.playouts) break;
330 if (DEBUGVV(2)) {
331 char *coord = coord2sstr(best, b);
332 snprintf(buf, sizeof(buf),
333 "temp winner is %s %s with score %1.4f (%d/%d games)"
334 " %d slaves %d threads\n",
335 stone2str(color), coord, get_value(stats[best].value, color),
336 stats_average(stats[best].playouts), playouts, reply_count, threads);
337 logline(NULL, "* ", buf);
339 /* Send the command with the same gtp id, to avoid discarding
340 * a reply to a previous genmoves at the same move. */
341 /* Do not send ascii stats, slave now expects binary args. */
342 genmoves_args(args, b, color, played, ti, NULL, 0);
343 update_cmd(b, cmd, args, false);
345 int replies = reply_count;
347 /* Do not subtract time spent twice (see gtp_parse). */
348 *ti = saved_ti;
350 dist->my_last_move.color = color;
351 dist->my_last_move.coord = best;
352 dist->my_last_stats.value = stats[best].value;
353 dist->my_last_stats.playouts = stats_average(stats[best].playouts);
355 /* Tell the slaves to commit to the selected move, overwriting
356 * the last "pachi-genmoves" in the command history. */
357 char *coord = coord2str(best, b);
358 snprintf(args, sizeof(args), "%s %s\n", stone2str(color), coord);
359 update_cmd(b, "play", args, true);
360 protocol_unlock();
362 if (DEBUGL(1)) {
363 double time = now - first + 0.000001; /* avoid divide by zero */
364 snprintf(buf, sizeof(buf),
365 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
366 "genmove %d games in %0.2fs %d slaves %d threads (%d games/s,"
367 " %d games/s/slave, %d games/s/thread, %.3f ms/iter)\n",
368 stone2str(color), coord, get_value(stats[best].value, color),
369 stats[best].playouts, playouts, played, time, replies, threads,
370 (int)(played/time), (int)(played/time/replies),
371 (int)(played/time/threads), 1000*time/iterations);
372 logline(NULL, "* ", buf);
374 free(coord);
375 return coord_copy(best);
378 static char *
379 distributed_chat(struct engine *e, struct board *b, char *cmd)
381 struct distributed *dist = e->data;
382 static char reply[BSIZE];
384 cmd += strspn(cmd, " \n\t");
385 if (!strncasecmp(cmd, "winrate", 7)) {
386 enum stone color = dist->my_last_move.color;
387 snprintf(reply, BSIZE, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
388 dist->my_last_stats.playouts, active_slaves, stone2str(color),
389 coord2sstr(dist->my_last_move.coord, b),
390 100 * get_value(dist->my_last_stats.value, color));
391 return reply;
393 return NULL;
396 static int
397 scmp(const void *p1, const void *p2)
399 return strcasecmp(*(char * const *)p1, *(char * const *)p2);
402 static void
403 distributed_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
405 protocol_lock();
407 new_cmd(b, "final_status_list", "dead\n");
408 get_replies(time_now() + MAX_FAST_CMD_WAIT, active_slaves);
410 /* Find the most popular reply. */
411 qsort(gtp_replies, reply_count, sizeof(char *), scmp);
412 int best_reply = 0;
413 int best_count = 1;
414 int count = 1;
415 for (int reply = 1; reply < reply_count; reply++) {
416 if (!strcmp(gtp_replies[reply], gtp_replies[reply-1])) {
417 count++;
418 } else {
419 count = 1;
421 if (count > best_count) {
422 best_count = count;
423 best_reply = reply;
427 /* Pick the first move of each line as group. */
428 char *dead = gtp_replies[best_reply];
429 dead = strchr(dead, ' '); // skip "id "
430 while (dead && *++dead != '\n') {
431 coord_t *c = str2coord(dead, board_size(b));
432 mq_add(mq, *c);
433 coord_done(c);
434 dead = strchr(dead, '\n');
436 protocol_unlock();
439 static struct distributed *
440 distributed_state_init(char *arg, struct board *b)
442 struct distributed *dist = calloc2(1, sizeof(struct distributed));
444 dist->max_slaves = DEFAULT_MAX_SLAVES;
445 dist->shared_nodes = DEFAULT_SHARED_NODES;
446 if (arg) {
447 char *optspec, *next = arg;
448 while (*next) {
449 optspec = next;
450 next += strcspn(next, ",");
451 if (*next) { *next++ = 0; } else { *next = 0; }
453 char *optname = optspec;
454 char *optval = strchr(optspec, '=');
455 if (optval) *optval++ = 0;
457 if (!strcasecmp(optname, "slave_port") && optval) {
458 dist->slave_port = strdup(optval);
459 } else if (!strcasecmp(optname, "proxy_port") && optval) {
460 dist->proxy_port = strdup(optval);
461 } else if (!strcasecmp(optname, "max_slaves") && optval) {
462 dist->max_slaves = atoi(optval);
463 } else if (!strcasecmp(optname, "shared_nodes") && optval) {
464 /* Share at most shared_nodes between master and slave at each genmoves.
465 * Must use the same value in master and slaves. */
466 dist->shared_nodes = atoi(optval);
467 } else if (!strcasecmp(optname, "slaves_quit")) {
468 dist->slaves_quit = !optval || atoi(optval);
469 } else {
470 fprintf(stderr, "distributed: Invalid engine argument %s or missing value\n", optname);
475 gtp_replies = calloc2(dist->max_slaves, sizeof(char *));
477 if (!dist->slave_port) {
478 fprintf(stderr, "distributed: missing slave_port\n");
479 exit(1);
481 protocol_init(dist->slave_port, dist->proxy_port, dist->max_slaves, dist->shared_nodes);
482 return dist;
485 struct engine *
486 engine_distributed_init(char *arg, struct board *b)
488 struct distributed *dist = distributed_state_init(arg, b);
489 struct engine *e = calloc2(1, sizeof(struct engine));
490 e->name = "Distributed Engine";
491 e->comment = "I'm playing the distributed engine. When I'm losing, I will resign, "
492 "if I think I win, I play until you pass. "
493 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
494 e->notify = distributed_notify;
495 e->genmove = distributed_genmove;
496 e->dead_group_list = distributed_dead_group_list;
497 e->chat = distributed_chat;
498 e->data = dist;
499 // Keep the threads and the open socket connections:
500 e->keep_on_clear = true;
502 return e;