Distributed engine: Define board_bits2 and path2sstr
[pachi.git] / distributed / distributed.c
blob215bcc7b013128059bceb28e8f196533103a67cb
1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends pachi-genmoves gtp commands regularly to each slave,
6 * gets as replies a list of candidate moves, their number of playouts
7 * and their value. The master then picks the most popular move. */
9 /* With time control, the master waits for all slaves, except
10 * when the allowed time is already passed. In this case the
11 * master picks among the available replies, or waits for just
12 * one reply if there is none yet.
13 * Without time control, the master waits until the desired
14 * number of games have been simulated. In this case the -t
15 * parameter for the master should be the sum of the parameters
16 * for all slaves. */
18 /* The master sends updated statistics for the best moves
19 * in each genmoves command. In this version only the
20 * children of the root node are updated. The slaves
21 * reply with just their own stats; they remember what was
22 * previously received from or sent to the master, to
23 * distinguish their own contribution from that of other slaves. */
25 /* The master-slave protocol has has fault tolerance. If a slave is
26 * out of sync, the master sends it the appropriate command history. */
28 /* Pass me arguments like a=b,c=d,...
29 * Supported arguments:
30 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
31 * max_slaves=MAX_SLAVES default 100
32 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
33 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
34 * Warning: with proxy_port, the master stderr mixes the logs of all
35 * machines but you can separate them again:
36 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
37 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
40 /* A configuration without proxy would have one master run on masterhost as:
41 * zzgo -e distributed slave_port=1234
42 * and N slaves running as:
43 * zzgo -e uct -g masterhost:1234 slave
44 * With log proxy:
45 * zzgo -e distributed slave_port=1234,proxy_port=1235
46 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
47 * If the master itself runs on a machine other than that running gogui,
48 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
49 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
52 #include <assert.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <limits.h>
57 #include <time.h>
58 #include <alloca.h>
59 #include <sys/types.h>
61 #define DEBUG
63 #include "engine.h"
64 #include "move.h"
65 #include "timeinfo.h"
66 #include "playout.h"
67 #include "stats.h"
68 #include "mq.h"
69 #include "debug.h"
70 #include "distributed/distributed.h"
71 #include "distributed/protocol.h"
73 /* Internal engine state. */
74 struct distributed {
75 char *slave_port;
76 char *proxy_port;
77 int max_slaves;
78 bool slaves_quit;
79 struct move my_last_move;
80 struct move_stats my_last_stats;
83 /* Default number of simulations to perform per move.
84 * Note that this is in total over all slaves! */
85 #define DIST_GAMES 80000
86 static const struct time_info default_ti = {
87 .period = TT_MOVE,
88 .dim = TD_GAMES,
89 .len = { .games = DIST_GAMES },
92 #define get_value(value, color) \
93 ((color) == S_BLACK ? (value) : 1 - (value))
96 /* Maximum time (seconds) to wait for answers to fast gtp commands
97 * (all commands except pachi-genmoves and final_status_list). */
98 #define MAX_FAST_CMD_WAIT 1.0
100 /* Maximum time (seconds) to wait for answers to genmoves. */
101 #define MAX_GENMOVES_WAIT 0.1 /* 100 ms */
104 /* Display a path as leaf<parent<grandparent...
105 * Returns the path string in a static buffer; it is NOT safe for
106 * anything but debugging - in particular, it is NOT thread-safe! */
107 char *
108 path2sstr(path_t path, struct board *b)
110 /* Special case for pass and resign. */
111 if (path < 0) return coord2sstr((coord_t)path, b);
113 static char buf[16][64];
114 static int bi = 0;
115 char *b2;
116 b2 = buf[bi++ & 15];
117 *b2 = '\0';
118 char *s = b2;
119 char *end = b2 + 64;
120 coord_t leaf;
121 while ((leaf = leaf_coord(path, b)) != 0) {
122 s += snprintf(s, end - s, "%s<", coord2sstr(leaf, b));
123 path = parent_path(path, b);
125 if (s != b2) s[-1] = '\0';
126 return b2;
129 /* Dispatch a new gtp command to all slaves.
130 * The slave lock must not be held upon entry and is released upon return.
131 * args is empty or ends with '\n' */
132 static enum parse_code
133 distributed_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
135 struct distributed *dist = e->data;
137 /* Commands that should not be sent to slaves.
138 * time_left will be part of next pachi-genmoves,
139 * we reduce latency by not forwarding it here. */
140 if ((!strcasecmp(cmd, "quit") && !dist->slaves_quit)
141 || !strcasecmp(cmd, "uct_genbook")
142 || !strcasecmp(cmd, "uct_dumpbook")
143 || !strcasecmp(cmd, "kgs-chat")
144 || !strcasecmp(cmd, "time_left")
146 /* and commands that will be sent to slaves later */
147 || !strcasecmp(cmd, "genmove")
148 || !strcasecmp(cmd, "kgs-genmove_cleanup")
149 || !strcasecmp(cmd, "final_score")
150 || !strcasecmp(cmd, "final_status_list"))
151 return P_OK;
153 protocol_lock();
155 // Create a new command to be sent by the slave threads.
156 new_cmd(b, cmd, args);
158 /* Wait for replies here. If we don't wait, we run the
159 * risk of getting out of sync with most slaves and
160 * sending command history too frequently. */
161 get_replies(time_now() + MAX_FAST_CMD_WAIT, active_slaves);
163 protocol_unlock();
164 return P_OK;
167 /* genmoves returns a line "=id played_own total_playouts threads keep_looking[ reserved]"
168 * then a list of lines "coord playouts value amaf_playouts amaf_value".
169 * Return the move with most playouts, and additional stats.
170 * Keep this code in sync with uct/slave.c:report_stats().
171 * slave_lock is held on entry and on return. */
172 static coord_t
173 select_best_move(struct board *b, struct move_stats2 *stats, int *played,
174 int *total_playouts, int *total_threads, bool *keep_looking)
176 assert(reply_count > 0);
178 /* +2 for pass and resign */
179 memset(stats-2, 0, (board_size2(b)+2) * sizeof(*stats));
181 coord_t best_move = pass;
182 int best_playouts = -1;
183 *played = 0;
184 *total_playouts = 0;
185 *total_threads = 0;
186 int keep = 0;
188 for (int reply = 0; reply < reply_count; reply++) {
189 char *r = gtp_replies[reply];
190 int id, o, p, t, k;
191 if (sscanf(r, "=%d %d %d %d %d", &id, &o, &p, &t, &k) != 5) continue;
192 *played += o;
193 *total_playouts += p;
194 *total_threads += t;
195 keep += k;
196 // Skip the rest of the firt line if any (allow future extensions)
197 r = strchr(r, '\n');
199 char move[64];
200 struct move_stats2 s;
201 while (r && sscanf(++r, "%63s %d %f %d %f", move, &s.u.playouts,
202 &s.u.value, &s.amaf.playouts, &s.amaf.value) == 5) {
203 coord_t *c = str2coord(move, board_size(b));
204 stats_add_result(&stats[*c].u, s.u.value, s.u.playouts);
205 stats_add_result(&stats[*c].amaf, s.amaf.value, s.amaf.playouts);
207 if (stats[*c].u.playouts > best_playouts) {
208 best_playouts = stats[*c].u.playouts;
209 best_move = *c;
211 coord_done(c);
212 r = strchr(r, '\n');
215 *keep_looking = keep > reply_count / 2;
216 return best_move;
219 /* Set the args for the genmoves command. If stats is not null,
220 * append the stats from all slaves above min_playouts, except
221 * for pass and resign. args must have CMDS_SIZE bytes and
222 * upon return ends with an empty line.
223 * Keep this code in sync with uct_genmoves().
224 * slave_lock is held on entry and on return. */
225 static void
226 genmoves_args(char *args, struct board *b, enum stone color, int played,
227 struct time_info *ti, struct move_stats2 *stats, int min_playouts)
229 char *end = args + CMDS_SIZE;
230 char *s = args + snprintf(args, CMDS_SIZE, "%s %d", stone2str(color), played);
232 if (ti->dim == TD_WALLTIME) {
233 s += snprintf(s, end - s, " %.3f %.3f %d %d",
234 ti->len.t.main_time, ti->len.t.byoyomi_time,
235 ti->len.t.byoyomi_periods, ti->len.t.byoyomi_stones);
237 s += snprintf(s, end - s, "\n");
238 if (stats) {
239 foreach_point(b) {
240 if (stats[c].u.playouts <= min_playouts) continue;
241 s += snprintf(s, end - s, "%s %d %.7f %d %.7f\n",
242 coord2sstr(c, b),
243 stats[c].u.playouts, stats[c].u.value,
244 stats[c].amaf.playouts, stats[c].amaf.value);
245 } foreach_point_end;
247 s += snprintf(s, end - s, "\n");
250 /* Time control is mostly done by the slaves, so we use default values here. */
251 #define FUSEKI_END 20
252 #define YOSE_START 40
254 static coord_t *
255 distributed_genmove(struct engine *e, struct board *b, struct time_info *ti,
256 enum stone color, bool pass_all_alive)
258 struct distributed *dist = e->data;
259 double now = time_now();
260 double first = now;
262 char *cmd = pass_all_alive ? "pachi-genmoves_cleanup" : "pachi-genmoves";
263 char args[CMDS_SIZE];
265 coord_t best;
266 int played, playouts, threads;
268 if (ti->period == TT_NULL) *ti = default_ti;
269 struct time_stop stop;
270 time_stop_conditions(ti, b, FUSEKI_END, YOSE_START, &stop);
271 struct time_info saved_ti = *ti;
273 /* Send the first genmoves without stats. */
274 genmoves_args(args, b, color, 0, ti, NULL, 0);
276 /* Combined move stats from all slaves, only for children
277 * of the root node, plus 2 for pass and resign. */
278 struct move_stats2 *stats = alloca((board_size2(b)+2) * sizeof(struct move_stats2));
279 stats += 2;
281 protocol_lock();
282 new_cmd(b, cmd, args);
284 /* Loop until most slaves want to quit or time elapsed. */
285 for (;;) {
286 double start = now;
287 /* Wait for just one slave to get stats as fresh as possible,
288 * or at most 100ms to check if we run out of time. */
289 get_replies(now + MAX_GENMOVES_WAIT, 1);
290 now = time_now();
291 if (ti->dim == TD_WALLTIME)
292 time_sub(ti, now - start, false);
294 bool keep_looking;
295 best = select_best_move(b, stats, &played, &playouts, &threads, &keep_looking);
297 if (!keep_looking) break;
298 if (ti->dim == TD_WALLTIME) {
299 if (now - ti->len.t.timer_start >= stop.worst.time) break;
300 } else {
301 if (played >= stop.worst.playouts) break;
303 if (DEBUGL(2)) {
304 char buf[BSIZE];
305 char *coord = coord2sstr(best, b);
306 snprintf(buf, sizeof(buf),
307 "temp winner is %s %s with score %1.4f (%d/%d games)"
308 " %d slaves %d threads\n",
309 stone2str(color), coord, get_value(stats[best].u.value, color),
310 stats[best].u.playouts, playouts, reply_count, threads);
311 logline(NULL, "* ", buf);
313 /* Send the command with the same gtp id, to avoid discarding
314 * a reply to a previous genmoves at the same move. */
315 /* Do not send ascii stats, slave now expects binary args. */
316 genmoves_args(args, b, color, played, ti, NULL, 0);
317 update_cmd(b, cmd, args, false);
319 int replies = reply_count;
321 /* Do not subtract time spent twice (see gtp_parse). */
322 *ti = saved_ti;
324 dist->my_last_move.color = color;
325 dist->my_last_move.coord = best;
326 dist->my_last_stats = stats[best].u;
328 /* Tell the slaves to commit to the selected move, overwriting
329 * the last "pachi-genmoves" in the command history. */
330 char *coord = coord2str(best, b);
331 snprintf(args, sizeof(args), "%s %s\n", stone2str(color), coord);
332 update_cmd(b, "play", args, true);
333 protocol_unlock();
335 if (DEBUGL(1)) {
336 char buf[BSIZE];
337 double time = now - first + 0.000001; /* avoid divide by zero */
338 snprintf(buf, sizeof(buf),
339 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
340 "genmove %d games in %0.2fs %d slaves %d threads (%d games/s,"
341 " %d games/s/slave, %d games/s/thread)\n",
342 stone2str(color), coord, get_value(stats[best].u.value, color),
343 stats[best].u.playouts, playouts, played, time, replies, threads,
344 (int)(played/time), (int)(played/time/replies),
345 (int)(played/time/threads));
346 logline(NULL, "* ", buf);
348 free(coord);
349 return coord_copy(best);
352 static char *
353 distributed_chat(struct engine *e, struct board *b, char *cmd)
355 struct distributed *dist = e->data;
356 static char reply[BSIZE];
358 cmd += strspn(cmd, " \n\t");
359 if (!strncasecmp(cmd, "winrate", 7)) {
360 enum stone color = dist->my_last_move.color;
361 snprintf(reply, BSIZE, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
362 dist->my_last_stats.playouts, active_slaves, stone2str(color),
363 coord2sstr(dist->my_last_move.coord, b),
364 100 * get_value(dist->my_last_stats.value, color));
365 return reply;
367 return NULL;
370 static int
371 scmp(const void *p1, const void *p2)
373 return strcasecmp(*(char * const *)p1, *(char * const *)p2);
376 static void
377 distributed_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
379 protocol_lock();
381 new_cmd(b, "final_status_list", "dead\n");
382 get_replies(time_now() + MAX_FAST_CMD_WAIT, active_slaves);
384 /* Find the most popular reply. */
385 qsort(gtp_replies, reply_count, sizeof(char *), scmp);
386 int best_reply = 0;
387 int best_count = 1;
388 int count = 1;
389 for (int reply = 1; reply < reply_count; reply++) {
390 if (!strcmp(gtp_replies[reply], gtp_replies[reply-1])) {
391 count++;
392 } else {
393 count = 1;
395 if (count > best_count) {
396 best_count = count;
397 best_reply = reply;
401 /* Pick the first move of each line as group. */
402 char *dead = gtp_replies[best_reply];
403 dead = strchr(dead, ' '); // skip "id "
404 while (dead && *++dead != '\n') {
405 coord_t *c = str2coord(dead, board_size(b));
406 mq_add(mq, *c);
407 coord_done(c);
408 dead = strchr(dead, '\n');
410 protocol_unlock();
413 static struct distributed *
414 distributed_state_init(char *arg, struct board *b)
416 struct distributed *dist = calloc2(1, sizeof(struct distributed));
418 dist->max_slaves = 100;
419 if (arg) {
420 char *optspec, *next = arg;
421 while (*next) {
422 optspec = next;
423 next += strcspn(next, ",");
424 if (*next) { *next++ = 0; } else { *next = 0; }
426 char *optname = optspec;
427 char *optval = strchr(optspec, '=');
428 if (optval) *optval++ = 0;
430 if (!strcasecmp(optname, "slave_port") && optval) {
431 dist->slave_port = strdup(optval);
432 } else if (!strcasecmp(optname, "proxy_port") && optval) {
433 dist->proxy_port = strdup(optval);
434 } else if (!strcasecmp(optname, "max_slaves") && optval) {
435 dist->max_slaves = atoi(optval);
436 } else if (!strcasecmp(optname, "slaves_quit")) {
437 dist->slaves_quit = !optval || atoi(optval);
438 } else {
439 fprintf(stderr, "distributed: Invalid engine argument %s or missing value\n", optname);
444 gtp_replies = calloc2(dist->max_slaves, sizeof(char *));
446 if (!dist->slave_port) {
447 fprintf(stderr, "distributed: missing slave_port\n");
448 exit(1);
450 protocol_init(dist->slave_port, dist->proxy_port, dist->max_slaves);
451 return dist;
454 struct engine *
455 engine_distributed_init(char *arg, struct board *b)
457 struct distributed *dist = distributed_state_init(arg, b);
458 struct engine *e = calloc2(1, sizeof(struct engine));
459 e->name = "Distributed Engine";
460 e->comment = "I'm playing the distributed engine. When I'm losing, I will resign, "
461 "if I think I win, I play until you pass. "
462 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
463 e->notify = distributed_notify;
464 e->genmove = distributed_genmove;
465 e->dead_group_list = distributed_dead_group_list;
466 e->chat = distributed_chat;
467 e->data = dist;
468 // Keep the threads and the open socket connections:
469 e->keep_on_clear = true;
471 return e;