slave_loop(): Add function process_reply() to clean the code.
[pachi.git] / distributed / distributed.c
blob293559b595532642f2fe03386a73c407439c904d
1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends pachi-genmoves gtp commands regularly to each slave,
6 * gets as replies a list of candidate moves, their number of playouts
7 * and their value. The master then picks the most popular move. */
9 /* With time control, the master waits for all slaves, except
10 * when the allowed time is already passed. In this case the
11 * master picks among the available replies, or waits for just
12 * one reply if there is none yet.
13 * Without time control, the master waits until the desired
14 * number of games have been simulated. In this case the -t
15 * parameter for the master should be the sum of the parameters
16 * for all slaves. */
18 /* The master sends updated statistics for the best moves
19 * in each genmoves command. In this version only the
20 * children of the root node are updated. The slaves
21 * reply with just their own stats; they remember what was
22 * previously received from or sent to the master, to
23 * distinguish their own contribution from that of other slaves. */
25 /* The master-slave protocol has has fault tolerance. If a slave is
26 * out of sync, the master sends it the appropriate command history. */
28 /* Pass me arguments like a=b,c=d,...
29 * Supported arguments:
30 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
31 * max_slaves=MAX_SLAVES default 100
32 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
33 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
34 * Warning: with proxy_port, the master stderr mixes the logs of all
35 * machines but you can separate them again:
36 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
37 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
40 /* A configuration without proxy would have one master run on masterhost as:
41 * zzgo -e distributed slave_port=1234
42 * and N slaves running as:
43 * zzgo -e uct -g masterhost:1234 slave
44 * With log proxy:
45 * zzgo -e distributed slave_port=1234,proxy_port=1235
46 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
47 * If the master itself runs on a machine other than that running gogui,
48 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
49 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
52 #include <assert.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <pthread.h>
57 #include <limits.h>
58 #include <ctype.h>
59 #include <time.h>
60 #include <alloca.h>
61 #include <sys/types.h>
62 #include <sys/socket.h>
63 #include <arpa/inet.h>
65 #define DEBUG
67 #include "board.h"
68 #include "engine.h"
69 #include "move.h"
70 #include "timeinfo.h"
71 #include "network.h"
72 #include "playout.h"
73 #include "random.h"
74 #include "stats.h"
75 #include "mq.h"
76 #include "debug.h"
77 #include "distributed/distributed.h"
79 /* Internal engine state. */
80 struct distributed {
81 char *slave_port;
82 char *proxy_port;
83 int max_slaves;
84 bool slaves_quit;
85 struct move my_last_move;
86 struct move_stats my_last_stats;
89 /* Default number of simulations to perform per move.
90 * Note that this is in total over all slaves! */
91 #define DIST_GAMES 80000
92 static const struct time_info default_ti = {
93 .period = TT_MOVE,
94 .dim = TD_GAMES,
95 .len = { .games = DIST_GAMES },
98 #define get_value(value, color) \
99 ((color) == S_BLACK ? (value) : 1 - (value))
101 /* Max size for one line of reply or slave log. */
102 #define BSIZE 4096
104 /* Max size of all gtp commands for one game.
105 * 60 chars for the first line of genmoves plus 100 lines
106 * of 30 chars each for the stats at last move. */
107 #define CMDS_SIZE (60*MAX_GAMELEN + 30*100)
109 /* All gtp commands for current game separated by \n */
110 static char gtp_cmds[CMDS_SIZE];
112 /* Latest gtp command sent to slaves. */
113 static char *gtp_cmd = NULL;
115 /* Slaves send gtp_cmd when cmd_count changes. */
116 static int cmd_count = 0;
118 /* Remember at most 12 gtp ids per move: play pass,
119 * 10 genmoves (1s), play pass.
120 * For move 0 we always resend the whole history. */
121 #define MAX_CMDS_PER_MOVE 12
123 /* History of gtp commands sent for current game, indexed by move. */
124 static int id_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
125 static char *cmd_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
127 /* Number of active slave machines working for this master. */
128 static int active_slaves = 0;
130 /* Number of replies to last gtp command already received. */
131 static int reply_count = 0;
133 /* All replies to latest gtp command are in gtp_replies[0..reply_count-1]. */
134 static char **gtp_replies;
136 /* Mutex protecting gtp_cmds, gtp_cmd, id_history, cmd_history,
137 * cmd_count, active_slaves, reply_count & gtp_replies */
138 static pthread_mutex_t slave_lock = PTHREAD_MUTEX_INITIALIZER;
140 /* Condition signaled when a new gtp command is available. */
141 static pthread_cond_t cmd_cond = PTHREAD_COND_INITIALIZER;
143 /* Condition signaled when reply_count increases. */
144 static pthread_cond_t reply_cond = PTHREAD_COND_INITIALIZER;
146 /* Mutex protecting stderr. Must not be held at same time as slave_lock. */
147 static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
149 /* Absolute time when this program was started.
150 * For debugging only. */
151 static double start_time;
153 /* Write the time, client address, prefix, and string s to stderr atomically.
154 * s should end with a \n */
155 static void
156 logline(struct in_addr *client, char *prefix, char *s)
158 double now = time_now();
159 char addr[INET_ADDRSTRLEN];
160 if (client) {
161 inet_ntop(AF_INET, client, addr, sizeof(addr));
162 } else {
163 addr[0] = '\0';
165 pthread_mutex_lock(&log_lock);
166 fprintf(stderr, "%s%15s %9.3f: %s", prefix, addr, now - start_time, s);
167 pthread_mutex_unlock(&log_lock);
170 /* Thread opening a connection on the given socket and copying input
171 * from there to stderr. */
172 static void *
173 proxy_thread(void *arg)
175 int proxy_sock = (long)arg;
176 assert(proxy_sock >= 0);
177 for (;;) {
178 struct in_addr client;
179 int conn = open_server_connection(proxy_sock, &client);
180 FILE *f = fdopen(conn, "r");
181 char buf[BSIZE];
182 while (fgets(buf, BSIZE, f)) {
183 logline(&client, "< ", buf);
185 fclose(f);
189 /* Get a reply to one gtp command. Return the gtp command id,
190 * or -1 if error. reply must have at least CMDS_SIZE bytes.
191 * slave_lock is not held on either entry or exit of this function. */
192 static int
193 get_reply(FILE *f, struct in_addr client, char *reply)
195 int reply_id = -1;
196 *reply = '\0';
197 char *line = reply;
198 while (fgets(line, reply + CMDS_SIZE - line, f) && *line != '\n') {
199 if (DEBUGL(3) || (DEBUGL(2) && line == reply))
200 logline(&client, "<<", line);
201 if (reply_id < 0 && (*line == '=' || *line == '?') && isdigit(line[1]))
202 reply_id = atoi(line+1);
203 line += strlen(line);
205 if (*line != '\n') return -1;
206 return reply_id;
209 /* Send one gtp command and get a reply from the slave machine.
210 * Write the reply in buf which must have at least CMDS_SIZE bytes.
211 * Return the gtp command id, or -1 if error.
212 * slave_lock is held on both entry and exit of this function. */
213 static int
214 send_command(char *to_send, FILE *f, struct in_addr client, char *buf)
216 assert(to_send && gtp_cmd);
217 strncpy(buf, to_send, CMDS_SIZE);
218 bool resend = to_send != gtp_cmd;
220 pthread_mutex_unlock(&slave_lock);
222 if (DEBUGL(1) && resend)
223 logline(&client, "? ",
224 to_send == gtp_cmds ? "resend all\n" : "partial resend\n");
225 fputs(buf, f);
226 fflush(f);
227 if (DEBUGL(2)) {
228 if (!DEBUGL(3)) {
229 char *s = strchr(buf, '\n');
230 if (s) s[1] = '\0';
232 logline(&client, ">>", buf);
235 int reply_id = get_reply(f, client, buf);
237 pthread_mutex_lock(&slave_lock);
238 return reply_id;
241 /* Process the reply received from a slave machine.
242 * Copy it to reply_buf and return NULL if ok, or return
243 * the command to be sent again if the slave is out of sync.
244 * slave_lock is held on both entry and exit of this function. */
245 static char *
246 process_reply(int reply_id, char *reply, char *reply_buf,
247 int *last_reply_id, int *reply_slot)
249 /* Make sure we are still in sync. cmd_count may have
250 * changed but the reply is valid as long as cmd_id didn't
251 * change (this only occurs for consecutive genmoves). */
252 int cmd_id = atoi(gtp_cmd);
253 if (reply_id == cmd_id && *reply == '=') {
254 strncpy(reply_buf, reply, CMDS_SIZE);
255 if (reply_id != *last_reply_id)
256 *reply_slot = reply_count++;
257 gtp_replies[*reply_slot] = reply_buf;
258 *last_reply_id = reply_id;
260 pthread_cond_signal(&reply_cond);
261 return NULL;
263 /* Resend everything if slave got latest command,
264 * but doesn't have a correct board. */
265 if (reply_id == cmd_id) return gtp_cmds;
267 /* The slave is ouf-of-sync. Check whether the last command
268 * it received belongs to the current game. If so resend
269 * starting at the last move known by slave, otherwise
270 * resend the whole history. */
271 int reply_move = move_number(reply_id);
272 if (reply_move > move_number(cmd_id)) return gtp_cmds;
274 for (int slot = 0; slot < MAX_CMDS_PER_MOVE; slot++) {
275 if (reply_id == id_history[reply_move][slot]) {
276 return cmd_history[reply_move][slot];
279 return gtp_cmds;
282 /* Main loop of a slave thread.
283 * Send the current command to the slave machine and wait for a reply.
284 * Resend command history if the slave machine is out of sync.
285 * Returns when the connection with the slave machine is cut.
286 * slave_lock is held on both entry and exit of this function. */
287 static void
288 slave_loop(FILE *f, struct in_addr client, char *reply_buf, bool resend)
290 char *to_send = gtp_cmd;
291 int last_cmd_sent = 0;
292 int last_reply_id = -1;
293 int reply_slot = -1;
294 for (;;) {
295 while (last_cmd_sent == cmd_count && !resend) {
296 // Wait for a new gtp command.
297 pthread_cond_wait(&cmd_cond, &slave_lock);
298 to_send = gtp_cmd;
301 /* Command available, send it to slave machine.
302 * If slave was out of sync, send the history. */
303 char buf[CMDS_SIZE];
304 last_cmd_sent = cmd_count;
306 /* Send the command and get the reply, which always ends with \n\n
307 * The slave machine sends "=id reply" or "?id reply"
308 * with id == cmd_id if it is in sync. */
309 int reply_id = send_command(to_send, f, client, buf);
310 if (reply_id == -1) return;
312 to_send = process_reply(reply_id, buf, reply_buf,
313 &last_reply_id, &reply_slot);
314 if (!to_send) {
315 /* Good reply. Force waiting for a new command.
316 * The next genmoves stats we send must include those
317 * just received (this is assumed by the slave). */
318 last_cmd_sent = cmd_count;
319 resend = false;
320 continue;
322 resend = true;
326 /* Thread sending gtp commands to one slave machine, and
327 * reading replies. If a slave machine dies, this thread waits
328 * for a connection from another slave. */
329 static void *
330 slave_thread(void *arg)
332 int slave_sock = (long)arg;
333 assert(slave_sock >= 0);
334 char reply_buf[CMDS_SIZE];
335 bool resend = false;
337 for (;;) {
338 /* Wait for a connection from any slave. */
339 struct in_addr client;
340 int conn = open_server_connection(slave_sock, &client);
342 FILE *f = fdopen(conn, "r+");
343 if (DEBUGL(2))
344 logline(&client, "= ", "new slave\n");
346 /* Minimal check of the slave identity. */
347 fputs("name\n", f);
348 if (!fgets(reply_buf, sizeof(reply_buf), f)
349 || strncasecmp(reply_buf, "= Pachi", 7)
350 || !fgets(reply_buf, sizeof(reply_buf), f)
351 || strcmp(reply_buf, "\n")) {
352 logline(&client, "? ", "bad slave\n");
353 fclose(f);
354 continue;
357 pthread_mutex_lock(&slave_lock);
358 active_slaves++;
359 slave_loop(f, client, reply_buf, resend);
361 assert(active_slaves > 0);
362 active_slaves--;
363 // Unblock main thread if it was waiting for this slave.
364 pthread_cond_signal(&reply_cond);
365 pthread_mutex_unlock(&slave_lock);
367 resend = true;
368 if (DEBUGL(2))
369 logline(&client, "= ", "lost slave\n");
370 fclose(f);
374 /* Create a new gtp command for all slaves. The slave lock is held
375 * upon entry and upon return, so the command will actually be
376 * sent when the lock is released. The last command is overwritten
377 * if gtp_cmd points to a non-empty string. cmd is a single word;
378 * args has all arguments and is empty or has a trailing \n */
379 static void
380 update_cmd(struct board *b, char *cmd, char *args, bool new_id)
382 assert(gtp_cmd);
383 /* To make sure the slaves are in sync, we ignore the original id
384 * and use the board number plus some random bits as gtp id. */
385 static int gtp_id = -1;
386 int moves = is_reset(cmd) ? 0 : b->moves;
387 if (new_id) {
388 /* fast_random() is 16-bit only so the multiplication can't overflow. */
389 gtp_id = force_reply(moves + fast_random(65535) * DIST_GAMELEN);
390 reply_count = 0;
392 snprintf(gtp_cmd, gtp_cmds + CMDS_SIZE - gtp_cmd, "%d %s %s",
393 gtp_id, cmd, *args ? args : "\n");
394 cmd_count++;
396 /* Remember history for out-of-sync slaves. */
397 static int slot = 0;
398 slot = (slot + 1) % MAX_CMDS_PER_MOVE;
399 id_history[moves][slot] = gtp_id;
400 cmd_history[moves][slot] = gtp_cmd;
402 // Notify the slave threads about the new command.
403 pthread_cond_broadcast(&cmd_cond);
406 /* Update the command history, then create a new gtp command
407 * for all slaves. The slave lock is held upon entry and
408 * upon return, so the command will actually be sent when the
409 * lock is released. cmd is a single word; args has all
410 * arguments and is empty or has a trailing \n */
411 static void
412 new_cmd(struct board *b, char *cmd, char *args)
414 // Clear the history when a new game starts:
415 if (!gtp_cmd || is_gamestart(cmd)) {
416 gtp_cmd = gtp_cmds;
417 } else {
418 /* Preserve command history for new slaves.
419 * To indicate that the slave should only reply to
420 * the last command we force the id of previous
421 * commands to be just the move number. */
422 int id = prevent_reply(atoi(gtp_cmd));
423 int len = strspn(gtp_cmd, "0123456789");
424 char buf[32];
425 snprintf(buf, sizeof(buf), "%0*d", len, id);
426 memcpy(gtp_cmd, buf, len);
428 gtp_cmd += strlen(gtp_cmd);
431 // Let the slave threads send the new gtp command:
432 update_cmd(b, cmd, args, true);
435 /* Wait for at least one new reply. Return when all slaves have
436 * replied, or when the given absolute time is passed.
437 * The replies are returned in gtp_replies[0..reply_count-1]
438 * slave_lock is held on entry and on return. */
439 static void
440 get_replies(double time_limit)
442 for (;;) {
443 if (reply_count > 0) {
444 struct timespec ts;
445 double sec;
446 ts.tv_nsec = (int)(modf(time_limit, &sec)*1000000000.0);
447 ts.tv_sec = (int)sec;
448 pthread_cond_timedwait(&reply_cond, &slave_lock, &ts);
449 } else {
450 pthread_cond_wait(&reply_cond, &slave_lock);
452 if (reply_count == 0) continue;
453 if (reply_count >= active_slaves) return;
454 if (time_now() >= time_limit) break;
456 if (DEBUGL(1)) {
457 char buf[1024];
458 snprintf(buf, sizeof(buf),
459 "get_replies timeout %.3f >= %.3f, replies %d < active %d\n",
460 time_now() - start_time, time_limit - start_time,
461 reply_count, active_slaves);
462 logline(NULL, "? ", buf);
464 assert(reply_count > 0);
467 /* Maximum time (seconds) to wait for answers to fast gtp commands
468 * (all commands except pachi-genmoves and final_status_list). */
469 #define MAX_FAST_CMD_WAIT 1.0
471 /* How often to send a stats update to slaves (seconds) */
472 #define STATS_UPDATE_INTERVAL 0.1 /* 100ms */
474 /* Maximum time (seconds) to wait between genmoves
475 * (all commands except pachi-genmoves and final_status_list). */
476 #define MAX_FAST_CMD_WAIT 1.0
478 /* Dispatch a new gtp command to all slaves.
479 * The slave lock must not be held upon entry and is released upon return.
480 * args is empty or ends with '\n' */
481 static enum parse_code
482 distributed_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
484 struct distributed *dist = e->data;
486 /* Commands that should not be sent to slaves.
487 * time_left will be part of next pachi-genmoves,
488 * we reduce latency by not forwarding it here. */
489 if ((!strcasecmp(cmd, "quit") && !dist->slaves_quit)
490 || !strcasecmp(cmd, "uct_genbook")
491 || !strcasecmp(cmd, "uct_dumpbook")
492 || !strcasecmp(cmd, "kgs-chat")
493 || !strcasecmp(cmd, "time_left")
495 /* and commands that will be sent to slaves later */
496 || !strcasecmp(cmd, "genmove")
497 || !strcasecmp(cmd, "kgs-genmove_cleanup")
498 || !strcasecmp(cmd, "final_score")
499 || !strcasecmp(cmd, "final_status_list"))
500 return P_OK;
502 pthread_mutex_lock(&slave_lock);
504 // Create a new command to be sent by the slave threads.
505 new_cmd(b, cmd, args);
507 /* Wait for replies here. If we don't wait, we run the
508 * risk of getting out of sync with most slaves and
509 * sending command history too frequently. */
510 get_replies(time_now() + MAX_FAST_CMD_WAIT);
512 pthread_mutex_unlock(&slave_lock);
513 return P_OK;
516 /* genmoves returns a line "=id played_own total_playouts threads keep_looking[ reserved]"
517 * then a list of lines "coord playouts value amaf_playouts amaf_value".
518 * Return the move with most playouts, and additional stats.
519 * Keep this code in sync with uct/slave.c:report_stats().
520 * slave_lock is held on entry and on return. */
521 static coord_t
522 select_best_move(struct board *b, struct move_stats2 *stats, int *played,
523 int *total_playouts, int *total_threads, bool *keep_looking)
525 assert(reply_count > 0);
527 /* +2 for pass and resign */
528 memset(stats-2, 0, (board_size2(b)+2) * sizeof(*stats));
530 coord_t best_move = pass;
531 int best_playouts = -1;
532 *played = 0;
533 *total_playouts = 0;
534 *total_threads = 0;
535 int keep = 0;
537 for (int reply = 0; reply < reply_count; reply++) {
538 char *r = gtp_replies[reply];
539 int id, o, p, t, k;
540 if (sscanf(r, "=%d %d %d %d %d", &id, &o, &p, &t, &k) != 5) continue;
541 *played += o;
542 *total_playouts += p;
543 *total_threads += t;
544 keep += k;
545 // Skip the rest of the firt line if any (allow future extensions)
546 r = strchr(r, '\n');
548 char move[64];
549 struct move_stats2 s;
550 while (r && sscanf(++r, "%63s %d %f %d %f", move, &s.u.playouts,
551 &s.u.value, &s.amaf.playouts, &s.amaf.value) == 5) {
552 coord_t *c = str2coord(move, board_size(b));
553 stats_add_result(&stats[*c].u, s.u.value, s.u.playouts);
554 stats_add_result(&stats[*c].amaf, s.amaf.value, s.amaf.playouts);
556 if (stats[*c].u.playouts > best_playouts) {
557 best_playouts = stats[*c].u.playouts;
558 best_move = *c;
560 coord_done(c);
561 r = strchr(r, '\n');
564 *keep_looking = keep > reply_count / 2;
565 return best_move;
568 /* Set the args for the genmoves command. If stats is not null,
569 * append the stats from all slaves above min_playouts, except
570 * for pass and resign. args must have CMDS_SIZE bytes and
571 * upon return ends with an empty line.
572 * Keep this code in sync with uct_genmoves().
573 * slave_lock is held on entry and on return. */
574 static void
575 genmoves_args(char *args, struct board *b, enum stone color, int played,
576 struct time_info *ti, struct move_stats2 *stats, int min_playouts)
578 char *end = args + CMDS_SIZE;
579 char *s = args + snprintf(args, CMDS_SIZE, "%s %d", stone2str(color), played);
581 if (ti->dim == TD_WALLTIME) {
582 s += snprintf(s, end - s, " %.3f %.3f %d %d",
583 ti->len.t.main_time, ti->len.t.byoyomi_time,
584 ti->len.t.byoyomi_periods, ti->len.t.byoyomi_stones);
586 s += snprintf(s, end - s, "\n");
587 if (stats) {
588 foreach_point(b) {
589 if (stats[c].u.playouts <= min_playouts) continue;
590 s += snprintf(s, end - s, "%s %d %.7f %d %.7f\n",
591 coord2sstr(c, b),
592 stats[c].u.playouts, stats[c].u.value,
593 stats[c].amaf.playouts, stats[c].amaf.value);
594 } foreach_point_end;
596 s += snprintf(s, end - s, "\n");
599 /* Time control is mostly done by the slaves, so we use default values here. */
600 #define FUSEKI_END 20
601 #define YOSE_START 40
603 static coord_t *
604 distributed_genmove(struct engine *e, struct board *b, struct time_info *ti,
605 enum stone color, bool pass_all_alive)
607 struct distributed *dist = e->data;
608 double now = time_now();
609 double first = now;
611 char *cmd = pass_all_alive ? "pachi-genmoves_cleanup" : "pachi-genmoves";
612 char args[CMDS_SIZE];
614 coord_t best;
615 int played, playouts, threads;
617 if (ti->period == TT_NULL) *ti = default_ti;
618 struct time_stop stop;
619 time_stop_conditions(ti, b, FUSEKI_END, YOSE_START, &stop);
620 struct time_info saved_ti = *ti;
622 /* Send the first genmoves without stats. */
623 genmoves_args(args, b, color, 0, ti, NULL, 0);
625 /* Combined move stats from all slaves, only for children
626 * of the root node, plus 2 for pass and resign. */
627 struct move_stats2 *stats = alloca((board_size2(b)+2) * sizeof(struct move_stats2));
628 stats += 2;
630 pthread_mutex_lock(&slave_lock);
631 new_cmd(b, cmd, args);
633 /* Loop until most slaves want to quit or time elapsed. */
634 for (;;) {
635 double start = now;
636 get_replies(now + STATS_UPDATE_INTERVAL);
637 now = time_now();
638 if (ti->dim == TD_WALLTIME)
639 time_sub(ti, now - start);
641 bool keep_looking;
642 best = select_best_move(b, stats, &played, &playouts, &threads, &keep_looking);
644 if (!keep_looking) break;
645 if (ti->dim == TD_WALLTIME) {
646 if (now - ti->len.t.timer_start >= stop.worst.time) break;
647 } else {
648 if (played >= stop.worst.playouts) break;
650 if (DEBUGL(2)) {
651 char buf[BSIZE];
652 char *coord = coord2sstr(best, b);
653 snprintf(buf, sizeof(buf),
654 "temp winner is %s %s with score %1.4f (%d/%d games)"
655 " %d slaves %d threads\n",
656 stone2str(color), coord, get_value(stats[best].u.value, color),
657 stats[best].u.playouts, playouts, reply_count, threads);
658 logline(NULL, "* ", buf);
660 /* Send the command with the same gtp id, to avoid discarding
661 * a reply to a previous genmoves at the same move. */
662 genmoves_args(args, b, color, played, ti, stats, stats[best].u.playouts / 100);
663 update_cmd(b, cmd, args, false);
665 int replies = reply_count;
667 /* Do not subtract time spent twice (see gtp_parse). */
668 *ti = saved_ti;
670 dist->my_last_move.color = color;
671 dist->my_last_move.coord = best;
672 dist->my_last_stats = stats[best].u;
674 /* Tell the slaves to commit to the selected move, overwriting
675 * the last "pachi-genmoves" in the command history. */
676 char *coord = coord2str(best, b);
677 snprintf(args, sizeof(args), "%s %s\n", stone2str(color), coord);
678 update_cmd(b, "play", args, true);
679 pthread_mutex_unlock(&slave_lock);
681 if (DEBUGL(1)) {
682 char buf[BSIZE];
683 double time = now - first + 0.000001; /* avoid divide by zero */
684 snprintf(buf, sizeof(buf),
685 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
686 "genmove %d games in %0.2fs %d slaves %d threads (%d games/s,"
687 " %d games/s/slave, %d games/s/thread)\n",
688 stone2str(color), coord, get_value(stats[best].u.value, color),
689 stats[best].u.playouts, playouts, played, time, replies, threads,
690 (int)(played/time), (int)(played/time/replies),
691 (int)(played/time/threads));
692 logline(NULL, "* ", buf);
694 free(coord);
695 return coord_copy(best);
698 static char *
699 distributed_chat(struct engine *e, struct board *b, char *cmd)
701 struct distributed *dist = e->data;
702 static char reply[BSIZE];
704 cmd += strspn(cmd, " \n\t");
705 if (!strncasecmp(cmd, "winrate", 7)) {
706 enum stone color = dist->my_last_move.color;
707 snprintf(reply, BSIZE, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
708 dist->my_last_stats.playouts, active_slaves, stone2str(color),
709 coord2sstr(dist->my_last_move.coord, b),
710 100 * get_value(dist->my_last_stats.value, color));
711 return reply;
713 return NULL;
716 static int
717 scmp(const void *p1, const void *p2)
719 return strcasecmp(*(char * const *)p1, *(char * const *)p2);
722 static void
723 distributed_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
725 pthread_mutex_lock(&slave_lock);
727 new_cmd(b, "final_status_list", "dead\n");
728 get_replies(time_now() + MAX_FAST_CMD_WAIT);
730 /* Find the most popular reply. */
731 qsort(gtp_replies, reply_count, sizeof(char *), scmp);
732 int best_reply = 0;
733 int best_count = 1;
734 int count = 1;
735 for (int reply = 1; reply < reply_count; reply++) {
736 if (!strcmp(gtp_replies[reply], gtp_replies[reply-1])) {
737 count++;
738 } else {
739 count = 1;
741 if (count > best_count) {
742 best_count = count;
743 best_reply = reply;
747 /* Pick the first move of each line as group. */
748 char *dead = gtp_replies[best_reply];
749 dead = strchr(dead, ' '); // skip "id "
750 while (dead && *++dead != '\n') {
751 coord_t *c = str2coord(dead, board_size(b));
752 mq_add(mq, *c);
753 coord_done(c);
754 dead = strchr(dead, '\n');
756 pthread_mutex_unlock(&slave_lock);
759 static struct distributed *
760 distributed_state_init(char *arg, struct board *b)
762 struct distributed *dist = calloc2(1, sizeof(struct distributed));
764 dist->max_slaves = 100;
765 if (arg) {
766 char *optspec, *next = arg;
767 while (*next) {
768 optspec = next;
769 next += strcspn(next, ",");
770 if (*next) { *next++ = 0; } else { *next = 0; }
772 char *optname = optspec;
773 char *optval = strchr(optspec, '=');
774 if (optval) *optval++ = 0;
776 if (!strcasecmp(optname, "slave_port") && optval) {
777 dist->slave_port = strdup(optval);
778 } else if (!strcasecmp(optname, "proxy_port") && optval) {
779 dist->proxy_port = strdup(optval);
780 } else if (!strcasecmp(optname, "max_slaves") && optval) {
781 dist->max_slaves = atoi(optval);
782 } else if (!strcasecmp(optname, "slaves_quit")) {
783 dist->slaves_quit = !optval || atoi(optval);
784 } else {
785 fprintf(stderr, "distributed: Invalid engine argument %s or missing value\n", optname);
790 gtp_replies = calloc2(dist->max_slaves, sizeof(char *));
792 if (!dist->slave_port) {
793 fprintf(stderr, "distributed: missing slave_port\n");
794 exit(1);
796 int slave_sock = port_listen(dist->slave_port, dist->max_slaves);
797 pthread_t thread;
798 for (int id = 0; id < dist->max_slaves; id++) {
799 pthread_create(&thread, NULL, slave_thread, (void *)(long)slave_sock);
802 if (dist->proxy_port) {
803 int proxy_sock = port_listen(dist->proxy_port, dist->max_slaves);
804 for (int id = 0; id < dist->max_slaves; id++) {
805 pthread_create(&thread, NULL, proxy_thread, (void *)(long)proxy_sock);
808 return dist;
811 struct engine *
812 engine_distributed_init(char *arg, struct board *b)
814 start_time = time_now();
815 struct distributed *dist = distributed_state_init(arg, b);
816 struct engine *e = calloc2(1, sizeof(struct engine));
817 e->name = "Distributed Engine";
818 e->comment = "I'm playing the distributed engine. When I'm losing, I will resign, "
819 "if I think I win, I play until you pass. "
820 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
821 e->notify = distributed_notify;
822 e->genmove = distributed_genmove;
823 e->dead_group_list = distributed_dead_group_list;
824 e->chat = distributed_chat;
825 e->data = dist;
826 // Keep the threads and the open socket connections:
827 e->keep_on_clear = true;
829 return e;