Distributed engine: slaves keep thinking after genmoves command,
[pachi/pachi-r6144.git] / distributed / distributed.c
blobd773fea421db024901b8bdca2486d919c4f752f3
1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends pachi-genmoves gtp commands regularly to each slave,
6 * gets as replies a list of candidate moves, their number of playouts
7 * and their value. The master then picks the most popular move. */
9 /* With time control, the master waits for all slaves, except
10 * when the allowed time is already passed. In this case the
11 * master picks among the available replies, or waits for just
12 * one reply if there is none yet.
13 * Without time control, the master waits until the desired
14 * number of games have been simulated. In this case the -t
15 * parameter for the master should be the sum of the parameters
16 * for all slaves. */
18 /* This first version does not send tree updates between slaves,
19 * but it has fault tolerance. If a slave is out of sync, the master
20 * sends it the appropriate command history. */
22 /* Pass me arguments like a=b,c=d,...
23 * Supported arguments:
24 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
25 * max_slaves=MAX_SLAVES default 100
26 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
27 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
28 * Warning: with proxy_port, the master stderr mixes the logs of all
29 * machines but you can separate them again:
30 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
31 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
34 /* A configuration without proxy would have one master run on masterhost as:
35 * zzgo -e distributed slave_port=1234
36 * and N slaves running as:
37 * zzgo -e uct -g masterhost:1234 slave
38 * With log proxy:
39 * zzgo -e distributed slave_port=1234,proxy_port=1235
40 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
41 * If the master itself runs on a machine other than that running gogui,
42 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
43 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
46 #include <assert.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <pthread.h>
51 #include <limits.h>
52 #include <ctype.h>
53 #include <time.h>
54 #include <alloca.h>
55 #include <sys/types.h>
56 #include <sys/socket.h>
57 #include <arpa/inet.h>
59 #define DEBUG
61 #include "board.h"
62 #include "engine.h"
63 #include "move.h"
64 #include "timeinfo.h"
65 #include "network.h"
66 #include "playout.h"
67 #include "random.h"
68 #include "stats.h"
69 #include "mq.h"
70 #include "debug.h"
71 #include "distributed/distributed.h"
73 /* Internal engine state. */
74 struct distributed {
75 char *slave_port;
76 char *proxy_port;
77 int max_slaves;
78 bool slaves_quit;
79 struct move my_last_move;
80 struct move_stats my_last_stats;
83 static coord_t select_best_move(struct board *b, struct move_stats *best_stats,
84 int *total_playouts, int *total_threads, bool *keep_looking);
86 /* Default number of simulations to perform per move.
87 * Note that this is in total over all slaves! */
88 #define DIST_GAMES 80000
89 static const struct time_info default_ti = {
90 .period = TT_MOVE,
91 .dim = TD_GAMES,
92 .len = { .games = DIST_GAMES },
95 #define get_value(value, color) \
96 ((color) == S_BLACK ? (value) : 1 - (value))
98 /* Max size for one line of reply or slave log. */
99 #define BSIZE 4096
101 /* Max size of all gtp commands for one game.
102 * 60 chars for the first line of genmoves plus 100 lines
103 * of 30 chars each for the stats at last move. */
104 #define CMDS_SIZE (60*MAX_GAMELEN + 30*100)
106 /* All gtp commands for current game separated by \n */
107 static char gtp_cmds[CMDS_SIZE];
109 /* Latest gtp command sent to slaves. */
110 static char *gtp_cmd = NULL;
112 /* Remember at most 12 gtp ids per move: play pass,
113 * 10 genmoves (1s), play pass.
114 * For move 0 we always resend the whole history. */
115 #define MAX_CMDS_PER_MOVE 12
117 /* History of gtp commands sent for current game, indexed by move. */
118 static int id_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
119 static char *cmd_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
121 /* Number of active slave machines working for this master. */
122 static int active_slaves = 0;
124 /* Number of replies to last gtp command already received. */
125 static int reply_count = 0;
126 static int final_reply_count = 0;
128 /* All replies to latest gtp command are in gtp_replies[0..reply_count-1]. */
129 static char **gtp_replies;
131 /* Mutex protecting gtp_cmds, gtp_cmd, id_history, cmd_history,
132 * active_slaves, reply_count, final_reply_count & gtp_replies */
133 static pthread_mutex_t slave_lock = PTHREAD_MUTEX_INITIALIZER;
135 /* Condition signaled when a new gtp command is available. */
136 static pthread_cond_t cmd_cond = PTHREAD_COND_INITIALIZER;
138 /* Condition signaled when reply_count increases. */
139 static pthread_cond_t reply_cond = PTHREAD_COND_INITIALIZER;
141 /* Mutex protecting stderr. Must not be held at same time as slave_lock. */
142 static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
144 /* Absolute time when this program was started.
145 * For debugging only. */
146 static double start_time;
148 /* Write the time, client address, prefix, and string s to stderr atomically.
149 * s should end with a \n */
150 static void
151 logline(struct in_addr *client, char *prefix, char *s)
153 double now = time_now();
154 char addr[INET_ADDRSTRLEN];
155 if (client) {
156 inet_ntop(AF_INET, client, addr, sizeof(addr));
157 } else {
158 addr[0] = '\0';
160 pthread_mutex_lock(&log_lock);
161 fprintf(stderr, "%s%15s %9.3f: %s", prefix, addr, now - start_time, s);
162 pthread_mutex_unlock(&log_lock);
165 /* Thread opening a connection on the given socket and copying input
166 * from there to stderr. */
167 static void *
168 proxy_thread(void *arg)
170 int proxy_sock = (long)arg;
171 assert(proxy_sock >= 0);
172 for (;;) {
173 struct in_addr client;
174 int conn = open_server_connection(proxy_sock, &client);
175 FILE *f = fdopen(conn, "r");
176 char buf[BSIZE];
177 while (fgets(buf, BSIZE, f)) {
178 logline(&client, "< ", buf);
180 fclose(f);
184 /* Get a reply to one gtp command. If we get a temporary
185 * reply, put it in gtp_replies[reply_slot], notify the main
186 * thread, and continue reading until we get a final reply.
187 * Return the gtp command id, or -1 if error.
188 * slave_buf and reply must have at least CMDS_SIZE bytes.
189 * slave_lock is not held on either entry or exit of this function. */
190 static int
191 get_reply(FILE *f, struct in_addr client, char *slave_buf, char *reply, int *reply_slot)
193 int reply_id = -1;
194 *reply_slot = -1;
195 *reply = '\0';
196 char *line = reply;
197 while (fgets(line, reply + CMDS_SIZE - line, f) && *line != '\n') {
198 if (DEBUGL(2))
199 logline(&client, "<<", line);
200 if (reply_id < 0 && (*line == '=' || *line == '?') && isdigit(line[1]))
201 reply_id = atoi(line+1);
202 if (*line == '#') {
203 /* Temporary reply. */
204 line = reply;
205 pthread_mutex_lock(&slave_lock);
206 if (reply_id != atoi(gtp_cmd)) {
207 pthread_mutex_unlock(&slave_lock);
208 continue; // read and discard the rest
210 strncpy(slave_buf, reply, CMDS_SIZE);
211 if (*reply_slot < 0)
212 *reply_slot = reply_count++;
213 gtp_replies[*reply_slot] = slave_buf;
214 pthread_cond_signal(&reply_cond);
215 pthread_mutex_unlock(&slave_lock);
216 } else {
217 line += strlen(line);
220 if (*line != '\n') return -1;
221 return reply_id;
224 /* Main loop of a slave thread.
225 * Send the current command to the slave machine and wait for a reply.
226 * Resend command history if the slave machine is out of sync.
227 * Returns when the connection with the slave machine is cut.
228 * slave_lock is held on both entry and exit of this function. */
229 static void
230 slave_loop(FILE *f, struct in_addr client, char *buf, bool resend)
232 char *to_send = gtp_cmd;
233 int cmd_id = -1;
234 int reply_id = -1;
235 int reply_slot;
236 for (;;) {
237 while (cmd_id == reply_id && !resend) {
238 // Wait for a new gtp command.
239 pthread_cond_wait(&cmd_cond, &slave_lock);
240 if (gtp_cmd)
241 cmd_id = atoi(gtp_cmd);
242 to_send = gtp_cmd;
245 /* Command available, send it to slave machine.
246 * If slave was out of sync, send the history. */
247 assert(to_send && gtp_cmd);
248 strncpy(buf, to_send, CMDS_SIZE);
249 cmd_id = atoi(gtp_cmd);
251 pthread_mutex_unlock(&slave_lock);
253 if (DEBUGL(1) && resend) {
254 if (to_send == gtp_cmds) {
255 logline(&client, "? ", "Slave out-of-sync, resending all history\n");
256 } else {
257 logline(&client, "? ", "Slave behind, partial resend\n");
260 if (DEBUGL(2))
261 logline(&client, ">>", buf);
262 fputs(buf, f);
263 fflush(f);
265 /* Read the reply, which always ends with \n\n
266 * The slave machine sends "=id reply" or "?id reply"
267 * with id == cmd_id if it is in sync. */
268 char reply[CMDS_SIZE];
269 reply_id = get_reply(f, client, buf, reply, &reply_slot);
271 pthread_mutex_lock(&slave_lock);
272 if (reply_id == -1) return;
274 // Make sure we are still in sync:
275 cmd_id = atoi(gtp_cmd);
276 if (reply_id == cmd_id && *reply == '=') {
277 resend = false;
278 strncpy(buf, reply, CMDS_SIZE);
279 final_reply_count++;
280 if (reply_slot < 0)
281 reply_slot = reply_count++;
282 gtp_replies[reply_slot] = buf;
283 pthread_cond_signal(&reply_cond);
284 continue;
286 resend = true;
287 to_send = gtp_cmds;
288 /* Resend everything if slave got latest command,
289 * but doesn't have a correct board. */
290 if (reply_id == cmd_id) continue;
292 /* The slave is ouf-of-sync. Check whether the last command
293 * it received belongs to the current game. If so resend
294 * starting at the last move known by slave, otherwise
295 * resend the whole history. */
296 int reply_move = move_number(reply_id);
297 if (reply_move > move_number(cmd_id)) continue;
299 for (int slot = 0; slot < MAX_CMDS_PER_MOVE; slot++) {
300 if (reply_id == id_history[reply_move][slot]) {
301 to_send = cmd_history[reply_move][slot];
302 break;
308 /* Thread sending gtp commands to one slave machine, and
309 * reading replies. If a slave machine dies, this thread waits
310 * for a connection from another slave. */
311 static void *
312 slave_thread(void *arg)
314 int slave_sock = (long)arg;
315 assert(slave_sock >= 0);
316 char slave_buf[CMDS_SIZE];
317 bool resend = false;
319 for (;;) {
320 /* Wait for a connection from any slave. */
321 struct in_addr client;
322 int conn = open_server_connection(slave_sock, &client);
324 FILE *f = fdopen(conn, "r+");
325 if (DEBUGL(2))
326 logline(&client, "= ", "new slave\n");
328 /* Minimal check of the slave identity. */
329 fputs("name\n", f);
330 if (!fgets(slave_buf, sizeof(slave_buf), f)
331 || strncasecmp(slave_buf, "= Pachi", 7)
332 || !fgets(slave_buf, sizeof(slave_buf), f)
333 || strcmp(slave_buf, "\n")) {
334 logline(&client, "? ", "bad slave\n");
335 fclose(f);
336 continue;
339 pthread_mutex_lock(&slave_lock);
340 active_slaves++;
341 slave_loop(f, client, slave_buf, resend);
343 assert(active_slaves > 0);
344 active_slaves--;
345 // Unblock main thread if it was waiting for this slave.
346 pthread_cond_signal(&reply_cond);
347 pthread_mutex_unlock(&slave_lock);
349 resend = true;
350 if (DEBUGL(2))
351 logline(&client, "= ", "lost slave\n");
352 fclose(f);
356 /* Create a new gtp command for all slaves. The slave lock is held
357 * upon entry and upon return, so the command will actually be
358 * sent when the lock is released. The last command is overwritten
359 * if gtp_cmd points to a non-empty string. cmd is a single word;
360 * args has all arguments and is empty or has a trailing \n */
361 static void
362 update_cmd(struct board *b, char *cmd, char *args)
364 assert(gtp_cmd);
365 /* To make sure the slaves are in sync, we ignore the original id
366 * and use the board number plus some random bits as gtp id.
367 * Make sure the new command has a new id otherwise slaves
368 * won't send it. */
369 static int gtp_id = -1;
370 int id;
371 int moves = is_reset(cmd) ? 0 : b->moves;
372 do {
373 /* fast_random() is 16-bit only so the multiplication can't overflow. */
374 id = force_reply(moves + fast_random(65535) * DIST_GAMELEN);
375 } while (id == gtp_id);
376 gtp_id = id;
377 snprintf(gtp_cmd, gtp_cmds + CMDS_SIZE - gtp_cmd, "%d %s %s",
378 id, cmd, *args ? args : "\n");
379 reply_count = final_reply_count = 0;
381 /* Remember history for out-of-sync slaves. */
382 static int slot = 0;
383 slot = (slot + 1) % MAX_CMDS_PER_MOVE;
384 id_history[moves][slot] = id;
385 cmd_history[moves][slot] = gtp_cmd;
387 // Notify the slave threads about the new command.
388 pthread_cond_broadcast(&cmd_cond);
391 /* Update the command history, then create a new gtp command
392 * for all slaves. The slave lock is held upon entry and
393 * upon return, so the command will actually be sent when the
394 * lock is released. cmd is a single word; args has all
395 * arguments and is empty or has a trailing \n */
396 static void
397 new_cmd(struct board *b, char *cmd, char *args)
399 // Clear the history when a new game starts:
400 if (!gtp_cmd || is_gamestart(cmd)) {
401 gtp_cmd = gtp_cmds;
402 } else {
403 /* Preserve command history for new slaves.
404 * To indicate that the slave should only reply to
405 * the last command we force the id of previous
406 * commands to be just the move number. */
407 int id = prevent_reply(atoi(gtp_cmd));
408 int len = strspn(gtp_cmd, "0123456789");
409 char buf[32];
410 snprintf(buf, sizeof(buf), "%0*d", len, id);
411 memcpy(gtp_cmd, buf, len);
413 gtp_cmd += strlen(gtp_cmd);
416 // Let the slave threads send the new gtp command:
417 update_cmd(b, cmd, args);
420 /* If time_limit > 0, wait until all slaves have replied, or if the
421 * given absolute time is passed, wait for at least one reply.
422 * If time_limit == 0, wait until we get at least min_playouts games
423 * simulated in total by all the slaves, or until all slaves have replied.
424 * The replies are returned in gtp_replies[0..reply_count-1]
425 * slave_lock is held on entry and on return. */
426 static void
427 get_replies(double time_limit, int min_playouts, struct board *b)
429 while (reply_count == 0 || final_reply_count < active_slaves) {
430 if (time_limit && reply_count > 0) {
431 struct timespec ts;
432 double sec;
433 ts.tv_nsec = (int)(modf(time_limit, &sec)*1000000000.0);
434 ts.tv_sec = (int)sec;
435 pthread_cond_timedwait(&reply_cond, &slave_lock, &ts);
436 } else {
437 pthread_cond_wait(&reply_cond, &slave_lock);
439 if (reply_count == 0) continue;
440 if (final_reply_count >= active_slaves) return;
441 if (time_limit) {
442 if (time_now() >= time_limit) break;
443 } else {
444 int playouts;
445 select_best_move(b, NULL, &playouts, NULL, NULL);
446 if (playouts >= min_playouts) return;
449 if (DEBUGL(1)) {
450 char buf[1024];
451 snprintf(buf, sizeof(buf),
452 "get_replies timeout %.3f >= %.3f, final %d, temp %d, active %d\n",
453 time_now() - start_time, time_limit - start_time,
454 final_reply_count, reply_count, active_slaves);
455 logline(NULL, "? ", buf);
457 assert(reply_count > 0 && final_reply_count <= reply_count);
460 /* Maximum time (seconds) to wait for answers to fast gtp commands
461 * (all commands except pachi-genmoves and final_status_list). */
462 #define MAX_FAST_CMD_WAIT 1.0
464 /* How often to send a stats update to slaves (seconds) */
465 #define STATS_UPDATE_INTERVAL 0.1 /* 100ms */
467 /* Maximum time (seconds) to wait between genmoves
468 * (all commands except pachi-genmoves and final_status_list). */
469 #define MAX_FAST_CMD_WAIT 1.0
471 /* Dispatch a new gtp command to all slaves.
472 * The slave lock must not be held upon entry and is released upon return.
473 * args is empty or ends with '\n' */
474 static enum parse_code
475 distributed_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
477 struct distributed *dist = e->data;
479 /* Commands that should not be sent to slaves.
480 * time_left will be part of next pachi-genmoves,
481 * we reduce latency by not forwarding it here. */
482 if ((!strcasecmp(cmd, "quit") && !dist->slaves_quit)
483 || !strcasecmp(cmd, "uct_genbook")
484 || !strcasecmp(cmd, "uct_dumpbook")
485 || !strcasecmp(cmd, "kgs-chat")
486 || !strcasecmp(cmd, "time_left")
488 /* and commands that will be sent to slaves later */
489 || !strcasecmp(cmd, "genmove")
490 || !strcasecmp(cmd, "kgs-genmove_cleanup")
491 || !strcasecmp(cmd, "final_score")
492 || !strcasecmp(cmd, "final_status_list"))
493 return P_OK;
495 pthread_mutex_lock(&slave_lock);
497 // Create a new command to be sent by the slave threads.
498 new_cmd(b, cmd, args);
500 /* Wait for replies here. If we don't wait, we run the
501 * risk of getting out of sync with most slaves and
502 * sending command history too frequently. */
503 get_replies(time_now() + MAX_FAST_CMD_WAIT, 0, b);
505 pthread_mutex_unlock(&slave_lock);
506 return P_OK;
509 /* genmoves returns a line "=id total_playouts threads keep_looking[ reserved]"
510 * then a list of lines "coord playouts value".
511 * Return the move with most playouts, and optional additional info.
512 * Keep this code in sync with uct_getstats().
513 * slave_lock is held on entry and on return. */
514 static coord_t
515 select_best_move(struct board *b, struct move_stats *best_stats,
516 int *total_playouts, int *total_threads, bool *keep_looking)
518 assert(reply_count > 0);
520 /* +2 for pass and resign. */
521 struct move_stats *stats = alloca((board_size2(b)+2) * sizeof(struct move_stats));
522 memset(stats, 0, (board_size2(b)+2) * sizeof(*stats));
523 stats += 2;
525 coord_t best_move = pass;
526 int best_playouts = -1;
527 int playouts = 0;
528 int threads = 0;
529 int keep = 0;
531 for (int reply = 0; reply < reply_count; reply++) {
532 char *r = gtp_replies[reply];
533 int id, p, t, k;
534 if (sscanf(r, "=%d %d %d %d", &id, &p, &t, &k) != 4) continue;
535 playouts += p;
536 threads += t;
537 keep += k;
538 // Skip the rest of the firt line if any (allow future extensions)
539 r = strchr(r, '\n');
541 char move[64];
542 struct move_stats s;
543 while (r && sscanf(++r, "%63s %d %f", move, &s.playouts, &s.value) == 3) {
544 coord_t *c = str2coord(move, board_size(b));
545 stats_add_result(&stats[*c], s.value, s.playouts);
546 if (stats[*c].playouts > best_playouts) {
547 best_playouts = stats[*c].playouts;
548 best_move = *c;
550 coord_done(c);
551 r = strchr(r, '\n');
554 if (best_stats) *best_stats = stats[best_move];
555 if (total_playouts) *total_playouts = playouts;
556 if (total_threads) *total_threads = threads;
557 if (keep_looking) *keep_looking = keep > reply_count / 2;
558 return best_move;
561 /* Time control is mostly done by the slaves, so we use default values here. */
562 #define FUSEKI_END 20
563 #define YOSE_START 40
565 static coord_t *
566 distributed_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
568 struct distributed *dist = e->data;
569 double now = time_now();
570 double first = now;
572 char *cmd = pass_all_alive ? "pachi-genmoves_cleanup" : "pachi-genmoves";
573 char args[CMDS_SIZE];
574 char *end = args + sizeof(args);
576 coord_t best;
577 int playouts, threads;
578 struct move_stats best_stats;
580 if (ti->period == TT_NULL) *ti = default_ti;
581 struct time_stop stop;
582 time_stop_conditions(ti, b, FUSEKI_END, YOSE_START, &stop);
583 struct time_info saved_ti = *ti;
585 /* Send the first genmoves. This is
586 * a multi-line command ending with \n\n. */
587 char *col = args + snprintf(args, sizeof(args), "%s", stone2str(color));
588 char *s = col;
589 if (ti->dim == TD_WALLTIME) {
590 s += snprintf(s, end - s, " %.3f %.3f %d %d",
591 ti->len.t.main_time, ti->len.t.byoyomi_time,
592 ti->len.t.byoyomi_periods, ti->len.t.byoyomi_stones);
594 s += snprintf(s, end - s, "\n\n");
596 pthread_mutex_lock(&slave_lock);
597 new_cmd(b, cmd, args);
599 /* Loop until most slaves want to quit or time elapsed. */
600 for (;;) {
601 double start = now;
602 get_replies(now + STATS_UPDATE_INTERVAL, 0, b);
603 now = time_now();
604 s = col;
605 if (ti->dim == TD_WALLTIME) {
606 time_sub(ti, now - start);
607 s += snprintf(s, end - s, " %.3f %.3f %d %d",
608 ti->len.t.main_time, ti->len.t.byoyomi_time,
609 ti->len.t.byoyomi_periods, ti->len.t.byoyomi_stones);
611 s += snprintf(s, end - s, "\n");
612 bool keep_looking;
613 best = select_best_move(b, &best_stats, &playouts,
614 &threads, &keep_looking);
616 if (!keep_looking) break;
617 if (ti->dim == TD_WALLTIME) {
618 if (now - ti->len.t.timer_start >= stop.worst.time) break;
619 } else {
620 if (playouts >= stop.worst.playouts) break;
622 if (DEBUGL(2)) {
623 char buf[BSIZE];
624 char *coord = coord2sstr(best, b);
625 snprintf(buf, sizeof(buf),
626 "temp winner is %s %s with score %1.4f (%d/%d games)"
627 " %d slaves %d threads\n",
628 stone2str(color), coord, get_value(best_stats.value, color),
629 best_stats.playouts, playouts, reply_count, threads);
630 logline(NULL, "* ", buf);
632 update_cmd(b, cmd, args);
634 int replies = reply_count;
636 /* Do not subtract time spent twice (see gtp_parse). */
637 *ti = saved_ti;
639 dist->my_last_move.color = color;
640 dist->my_last_move.coord = best;
641 dist->my_last_stats = best_stats;
643 /* Tell the slaves to commit to the selected move, overwriting
644 * the last "pachi-genmoves" in the command history. */
645 char *coord = coord2str(best, b);
646 snprintf(args, sizeof(args), "%s %s\n", stone2str(color), coord);
647 update_cmd(b, "play", args);
648 pthread_mutex_unlock(&slave_lock);
650 if (DEBUGL(1)) {
651 char buf[BSIZE];
652 double time = now - first + 0.000001; /* avoid divide by zero */
653 snprintf(buf, sizeof(buf),
654 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
655 "genmove in %0.2fs %d slaves %d threads (%d games/s,"
656 " %d games/s/slave, %d games/s/thread)\n",
657 stone2str(color), coord, get_value(best_stats.value, color),
658 best_stats.playouts, playouts, time, replies, threads,
659 (int)(playouts/time), (int)(playouts/time/replies),
660 (int)(playouts/time/threads));
661 logline(NULL, "* ", buf);
663 free(coord);
664 return coord_copy(best);
667 static char *
668 distributed_chat(struct engine *e, struct board *b, char *cmd)
670 struct distributed *dist = e->data;
671 static char reply[BSIZE];
673 cmd += strspn(cmd, " \n\t");
674 if (!strncasecmp(cmd, "winrate", 7)) {
675 enum stone color = dist->my_last_move.color;
676 snprintf(reply, BSIZE, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
677 dist->my_last_stats.playouts, active_slaves, stone2str(color),
678 coord2sstr(dist->my_last_move.coord, b),
679 100 * get_value(dist->my_last_stats.value, color));
680 return reply;
682 return NULL;
685 static int
686 scmp(const void *p1, const void *p2)
688 return strcasecmp(*(char * const *)p1, *(char * const *)p2);
691 static void
692 distributed_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
694 pthread_mutex_lock(&slave_lock);
696 new_cmd(b, "final_status_list", "dead\n");
697 get_replies(time_now() + MAX_FAST_CMD_WAIT, 0, b);
699 /* Find the most popular reply. */
700 qsort(gtp_replies, reply_count, sizeof(char *), scmp);
701 int best_reply = 0;
702 int best_count = 1;
703 int count = 1;
704 for (int reply = 1; reply < reply_count; reply++) {
705 if (!strcmp(gtp_replies[reply], gtp_replies[reply-1])) {
706 count++;
707 } else {
708 count = 1;
710 if (count > best_count) {
711 best_count = count;
712 best_reply = reply;
716 /* Pick the first move of each line as group. */
717 char *dead = gtp_replies[best_reply];
718 dead = strchr(dead, ' '); // skip "id "
719 while (dead && *++dead != '\n') {
720 coord_t *c = str2coord(dead, board_size(b));
721 mq_add(mq, *c);
722 coord_done(c);
723 dead = strchr(dead, '\n');
725 pthread_mutex_unlock(&slave_lock);
728 static struct distributed *
729 distributed_state_init(char *arg, struct board *b)
731 struct distributed *dist = calloc(1, sizeof(struct distributed));
733 dist->max_slaves = 100;
734 if (arg) {
735 char *optspec, *next = arg;
736 while (*next) {
737 optspec = next;
738 next += strcspn(next, ",");
739 if (*next) { *next++ = 0; } else { *next = 0; }
741 char *optname = optspec;
742 char *optval = strchr(optspec, '=');
743 if (optval) *optval++ = 0;
745 if (!strcasecmp(optname, "slave_port") && optval) {
746 dist->slave_port = strdup(optval);
747 } else if (!strcasecmp(optname, "proxy_port") && optval) {
748 dist->proxy_port = strdup(optval);
749 } else if (!strcasecmp(optname, "max_slaves") && optval) {
750 dist->max_slaves = atoi(optval);
751 } else if (!strcasecmp(optname, "slaves_quit")) {
752 dist->slaves_quit = !optval || atoi(optval);
753 } else {
754 fprintf(stderr, "distributed: Invalid engine argument %s or missing value\n", optname);
759 gtp_replies = calloc(dist->max_slaves, sizeof(char *));
761 if (!dist->slave_port) {
762 fprintf(stderr, "distributed: missing slave_port\n");
763 exit(1);
765 int slave_sock = port_listen(dist->slave_port, dist->max_slaves);
766 pthread_t thread;
767 for (int id = 0; id < dist->max_slaves; id++) {
768 pthread_create(&thread, NULL, slave_thread, (void *)(long)slave_sock);
771 if (dist->proxy_port) {
772 int proxy_sock = port_listen(dist->proxy_port, dist->max_slaves);
773 for (int id = 0; id < dist->max_slaves; id++) {
774 pthread_create(&thread, NULL, proxy_thread, (void *)(long)proxy_sock);
777 return dist;
780 struct engine *
781 engine_distributed_init(char *arg, struct board *b)
783 start_time = time_now();
784 struct distributed *dist = distributed_state_init(arg, b);
785 struct engine *e = calloc(1, sizeof(struct engine));
786 e->name = "Distributed Engine";
787 e->comment = "I'm playing the distributed engine. When I'm losing, I will resign, "
788 "if I think I win, I play until you pass. "
789 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
790 e->notify = distributed_notify;
791 e->genmove = distributed_genmove;
792 e->dead_group_list = distributed_dead_group_list;
793 e->chat = distributed_chat;
794 e->data = dist;
795 // Keep the threads and the open socket connections:
796 e->keep_on_clear = true;
798 return e;