Distributed engine: unblock main thread after a slave is lost.
[pachi.git] / distributed / distributed.c
blob99faebd805280bdd156524baf6767a75d81ee766
1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends the pachi-genmoves gtp command to each slave,
6 * gets as replies a list of candidate moves, their number of playouts
7 * and their value. The master then picks the most popular move. */
9 /* With time control, the master waits for all slaves, except
10 * when the allowed time is already passed. In this case the
11 * master picks among the available replies, or waits for just
12 * one reply if there is none yet.
13 * Without time control, the master waits until the desired
14 * number of games have been simulated. In this case the -t
15 * parameter for the master should be the sum of the parameters
16 * for all slaves. */
18 /* To minimize the number of ignored replies because they arrive
19 * too late, slaves send temporary replies to the genmoves
20 * command, with the best moves so far. So when the master
21 * has to choose, it should have final replies from most
22 * slaves and at least temporary replies from all of them. */
24 /* This first version does not send tree updates between slaves,
25 * but it has fault tolerance. If a slave is out of sync, the master
26 * sends it the appropriate command history. */
28 /* Pass me arguments like a=b,c=d,...
29 * Supported arguments:
30 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
31 * max_slaves=MAX_SLAVES default 100
32 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
33 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
34 * Warning: with proxy_port, the master stderr mixes the logs of all
35 * machines but you can separate them again:
36 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
37 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
40 /* A configuration without proxy would have one master run on masterhost as:
41 * zzgo -e distributed slave_port=1234
42 * and N slaves running as:
43 * zzgo -e uct -g masterhost:1234 slave
44 * With log proxy:
45 * zzgo -e distributed slave_port=1234,proxy_port=1235
46 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
47 * If the master itself runs on a machine other than that running gogui,
48 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
49 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
52 #include <assert.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <pthread.h>
57 #include <limits.h>
58 #include <ctype.h>
59 #include <time.h>
60 #include <alloca.h>
61 #include <sys/types.h>
62 #include <sys/socket.h>
63 #include <arpa/inet.h>
65 #define DEBUG
67 #include "board.h"
68 #include "engine.h"
69 #include "move.h"
70 #include "timeinfo.h"
71 #include "network.h"
72 #include "playout.h"
73 #include "random.h"
74 #include "stats.h"
75 #include "mq.h"
76 #include "debug.h"
77 #include "distributed/distributed.h"
79 /* Internal engine state. */
80 struct distributed {
81 char *slave_port;
82 char *proxy_port;
83 int max_slaves;
84 bool slaves_quit;
85 struct move my_last_move;
86 struct move_stats my_last_stats;
89 static coord_t select_best_move(struct board *b, struct move_stats *best_stats,
90 int *total_playouts, int *total_threads);
92 /* Default number of simulations to perform per move.
93 * Note that this is in total over all slaves! */
94 #define DIST_GAMES 80000
95 static const struct time_info default_ti = {
96 .period = TT_MOVE,
97 .dim = TD_GAMES,
98 .len = { .games = DIST_GAMES },
101 #define get_value(value, color) \
102 ((color) == S_BLACK ? (value) : 1 - (value))
104 /* Max size for one reply or slave log. */
105 #define BSIZE 4096
107 /* Max size of all gtp commands for one game */
108 #define CMDS_SIZE (40*MAX_GAMELEN)
110 /* All gtp commands for current game separated by \n */
111 static char gtp_cmds[CMDS_SIZE];
113 /* Latest gtp command sent to slaves. */
114 static char *gtp_cmd = NULL;
116 /* Remember at most 3 gtp ids per move (time_left, genmoves, play).
117 * For move 0 there can be more than 3 commands
118 * but then we resend the whole history. */
119 #define MAX_CMDS_PER_MOVE 3
121 /* History of gtp commands sent for current game, indexed by move. */
122 static int id_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
123 static char *cmd_history[MAX_GAMELEN][MAX_CMDS_PER_MOVE];
125 /* Number of active slave machines working for this master. */
126 static int active_slaves = 0;
128 /* Number of replies to last gtp command already received. */
129 static int reply_count = 0;
130 static int final_reply_count = 0;
132 /* All replies to latest gtp command are in gtp_replies[0..reply_count-1]. */
133 static char **gtp_replies;
135 /* Mutex protecting gtp_cmds, gtp_cmd, id_history, cmd_history,
136 * active_slaves, reply_count, final_reply_count & gtp_replies */
137 static pthread_mutex_t slave_lock = PTHREAD_MUTEX_INITIALIZER;
139 /* Condition signaled when a new gtp command is available. */
140 static pthread_cond_t cmd_cond = PTHREAD_COND_INITIALIZER;
142 /* Condition signaled when reply_count increases. */
143 static pthread_cond_t reply_cond = PTHREAD_COND_INITIALIZER;
145 /* Mutex protecting stderr. Must not be held at same time as slave_lock. */
146 static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
148 /* Absolute time when this program was started.
149 * For debugging only. */
150 static double start_time;
152 /* Write the time, client address, prefix, and string s to stderr atomically.
153 * s should end with a \n */
154 static void
155 logline(struct in_addr *client, char *prefix, char *s)
157 double now = time_now();
158 char addr[INET_ADDRSTRLEN];
159 if (client) {
160 inet_ntop(AF_INET, client, addr, sizeof(addr));
161 } else {
162 addr[0] = '\0';
164 pthread_mutex_lock(&log_lock);
165 fprintf(stderr, "%s%15s %9.3f: %s", prefix, addr, now - start_time, s);
166 pthread_mutex_unlock(&log_lock);
169 /* Thread opening a connection on the given socket and copying input
170 * from there to stderr. */
171 static void *
172 proxy_thread(void *arg)
174 int proxy_sock = (long)arg;
175 assert(proxy_sock >= 0);
176 for (;;) {
177 struct in_addr client;
178 int conn = open_server_connection(proxy_sock, &client);
179 FILE *f = fdopen(conn, "r");
180 char buf[BSIZE];
181 while (fgets(buf, BSIZE, f)) {
182 logline(&client, "< ", buf);
184 fclose(f);
188 /* Get a reply to one gtp command. If we get a temporary
189 * reply, put it in gtp_replies[reply_slot], notify the main
190 * thread, and continue reading until we get a final reply.
191 * Return the gtp command id, or -1 if error.
192 * slave_buf and reply must have at least CMDS_SIZE bytes.
193 * slave_lock is not held on either entry or exit of this function. */
194 static int
195 get_reply(FILE *f, struct in_addr client, char *slave_buf, char *reply, int *reply_slot)
197 int reply_id = -1;
198 *reply_slot = -1;
199 *reply = '\0';
200 char *line = reply;
201 while (fgets(line, reply + CMDS_SIZE - line, f) && *line != '\n') {
202 if (DEBUGL(2))
203 logline(&client, "<<", line);
204 if (reply_id < 0 && (*line == '=' || *line == '?') && isdigit(line[1]))
205 reply_id = atoi(line+1);
206 if (*line == '#') {
207 /* Temporary reply. */
208 line = reply;
209 pthread_mutex_lock(&slave_lock);
210 if (reply_id != atoi(gtp_cmd)) {
211 pthread_mutex_unlock(&slave_lock);
212 continue; // read and discard the rest
214 strncpy(slave_buf, reply, CMDS_SIZE);
215 if (*reply_slot < 0)
216 *reply_slot = reply_count++;
217 gtp_replies[*reply_slot] = slave_buf;
218 pthread_cond_signal(&reply_cond);
219 pthread_mutex_unlock(&slave_lock);
220 } else {
221 line += strlen(line);
224 if (*line != '\n') return -1;
225 return reply_id;
228 /* Main loop of a slave thread.
229 * Send the current command to the slave machine and wait for a reply.
230 * Resend command history if the slave machine is out of sync.
231 * Returns when the connection with the slave machine is cut.
232 * slave_lock is held on both entry and exit of this function. */
233 static void
234 slave_loop(FILE *f, struct in_addr client, char *buf, bool resend)
236 char *to_send = gtp_cmd;
237 int cmd_id = -1;
238 int reply_id = -1;
239 int reply_slot;
240 for (;;) {
241 while (cmd_id == reply_id && !resend) {
242 // Wait for a new gtp command.
243 pthread_cond_wait(&cmd_cond, &slave_lock);
244 if (gtp_cmd)
245 cmd_id = atoi(gtp_cmd);
246 to_send = gtp_cmd;
249 /* Command available, send it to slave machine.
250 * If slave was out of sync, send the history. */
251 assert(to_send && gtp_cmd);
252 strncpy(buf, to_send, CMDS_SIZE);
253 cmd_id = atoi(gtp_cmd);
255 pthread_mutex_unlock(&slave_lock);
257 if (DEBUGL(1) && resend) {
258 if (to_send == gtp_cmds) {
259 logline(&client, "? ", "Slave out-of-sync, resending all history\n");
260 } else {
261 logline(&client, "? ", "Slave behind, partial resend\n");
264 if (DEBUGL(2))
265 logline(&client, ">>", buf);
266 fputs(buf, f);
267 fflush(f);
269 /* Read the reply, which always ends with \n\n
270 * The slave machine sends "=id reply" or "?id reply"
271 * with id == cmd_id if it is in sync. */
272 char reply[CMDS_SIZE];
273 reply_id = get_reply(f, client, buf, reply, &reply_slot);
275 pthread_mutex_lock(&slave_lock);
276 if (reply_id == -1) return;
278 // Make sure we are still in sync:
279 cmd_id = atoi(gtp_cmd);
280 if (reply_id == cmd_id && *reply == '=') {
281 resend = false;
282 strncpy(buf, reply, CMDS_SIZE);
283 final_reply_count++;
284 if (reply_slot < 0)
285 reply_slot = reply_count++;
286 gtp_replies[reply_slot] = buf;
287 pthread_cond_signal(&reply_cond);
288 continue;
290 resend = true;
291 to_send = gtp_cmds;
292 /* Resend everything if slave got latest command,
293 * but doesn't have a correct board. */
294 if (reply_id == cmd_id) continue;
296 /* The slave is ouf-of-sync. Check whether the last command
297 * it received belongs to the current game. If so resend
298 * starting at the last move known by slave, otherwise
299 * resend the whole history. */
300 int reply_move = move_number(reply_id);
301 if (reply_move > move_number(cmd_id)) continue;
303 for (int slot = 0; slot < MAX_CMDS_PER_MOVE; slot++) {
304 if (reply_id == id_history[reply_move][slot]) {
305 to_send = cmd_history[reply_move][slot];
306 break;
312 /* Thread sending gtp commands to one slave machine, and
313 * reading replies. If a slave machine dies, this thread waits
314 * for a connection from another slave. */
315 static void *
316 slave_thread(void *arg)
318 int slave_sock = (long)arg;
319 assert(slave_sock >= 0);
320 char slave_buf[CMDS_SIZE];
321 bool resend = false;
323 for (;;) {
324 /* Wait for a connection from any slave. */
325 struct in_addr client;
326 int conn = open_server_connection(slave_sock, &client);
328 FILE *f = fdopen(conn, "r+");
329 if (DEBUGL(2))
330 logline(&client, "= ", "new slave\n");
332 /* Minimal check of the slave identity. */
333 fputs("name\n", f);
334 if (!fgets(slave_buf, sizeof(slave_buf), f)
335 || strncasecmp(slave_buf, "= Pachi", 7)
336 || !fgets(slave_buf, sizeof(slave_buf), f)
337 || strcmp(slave_buf, "\n")) {
338 logline(&client, "? ", "bad slave\n");
339 fclose(f);
340 continue;
343 pthread_mutex_lock(&slave_lock);
344 active_slaves++;
345 slave_loop(f, client, slave_buf, resend);
347 assert(active_slaves > 0);
348 active_slaves--;
349 // Unblock main thread if it was waiting for this slave.
350 pthread_cond_signal(&reply_cond);
351 pthread_mutex_unlock(&slave_lock);
353 resend = true;
354 if (DEBUGL(2))
355 logline(&client, "= ", "lost slave\n");
356 fclose(f);
360 /* Create a new gtp command for all slaves. The slave lock is held
361 * upon entry and upon return, so the command will actually be
362 * sent when the lock is released. The last command is overwritten
363 * if gtp_cmd points to a non-empty string. cmd is a single word;
364 * args has all arguments and is empty or has a trailing \n */
365 static void
366 update_cmd(struct board *b, char *cmd, char *args)
368 assert(gtp_cmd);
369 /* To make sure the slaves are in sync, we ignore the original id
370 * and use the board number plus some random bits as gtp id.
371 * Make sure the new command has a new id otherwise slaves
372 * won't send it. */
373 static int gtp_id = -1;
374 int id;
375 int moves = is_reset(cmd) ? 0 : b->moves;
376 do {
377 /* fast_random() is 16-bit only so the multiplication can't overflow. */
378 id = force_reply(moves + fast_random(65535) * DIST_GAMELEN);
379 } while (id == gtp_id);
380 gtp_id = id;
381 snprintf(gtp_cmd, gtp_cmds + CMDS_SIZE - gtp_cmd, "%d %s %s",
382 id, cmd, *args ? args : "\n");
383 reply_count = final_reply_count = 0;
385 /* Remember history for out-of-sync slaves, at most 3 ids per move
386 * (time_left, genmoves, play). */
387 static int slot = 0;
388 slot = (slot + 1) % MAX_CMDS_PER_MOVE;
389 id_history[moves][slot] = id;
390 cmd_history[moves][slot] = gtp_cmd;
392 // Notify the slave threads about the new command.
393 pthread_cond_broadcast(&cmd_cond);
396 /* Update the command history, then create a new gtp command
397 * for all slaves. The slave lock is held upon entry and
398 * upon return, so the command will actually be sent when the
399 * lock is released. cmd is a single word; args has all
400 * arguments and is empty or has a trailing \n */
401 static void
402 new_cmd(struct board *b, char *cmd, char *args)
404 // Clear the history when a new game starts:
405 if (!gtp_cmd || is_gamestart(cmd)) {
406 gtp_cmd = gtp_cmds;
407 } else {
408 /* Preserve command history for new slaves.
409 * To indicate that the slave should only reply to
410 * the last command we force the id of previous
411 * commands to be just the move number. */
412 int id = prevent_reply(atoi(gtp_cmd));
413 int len = strspn(gtp_cmd, "0123456789");
414 char buf[32];
415 snprintf(buf, sizeof(buf), "%0*d", len, id);
416 memcpy(gtp_cmd, buf, len);
418 gtp_cmd += strlen(gtp_cmd);
421 // Let the slave threads send the new gtp command:
422 update_cmd(b, cmd, args);
425 /* If time_limit > 0, wait until all slaves have replied, or if the
426 * given absolute time is passed, wait for at least one reply.
427 * If time_limit == 0, wait until we get at least min_playouts games
428 * simulated in total by all the slaves, or until all slaves have replied.
429 * The replies are returned in gtp_replies[0..reply_count-1]
430 * slave_lock is held on entry and on return. */
431 static void
432 get_replies(double time_limit, int min_playouts, struct board *b)
434 while (reply_count == 0 || final_reply_count < active_slaves) {
435 if (time_limit && reply_count > 0) {
436 struct timespec ts;
437 double sec;
438 ts.tv_nsec = (int)(modf(time_limit, &sec)*1000000000.0);
439 ts.tv_sec = (int)sec;
440 pthread_cond_timedwait(&reply_cond, &slave_lock, &ts);
441 } else {
442 pthread_cond_wait(&reply_cond, &slave_lock);
444 if (reply_count == 0) continue;
445 if (final_reply_count >= active_slaves) return;
446 if (time_limit) {
447 if (time_now() >= time_limit) break;
448 } else {
449 int playouts, threads;
450 struct move_stats s;
451 select_best_move(b, &s, &playouts, &threads);
452 if (playouts >= min_playouts) return;
455 if (DEBUGL(1)) {
456 char buf[1024];
457 snprintf(buf, sizeof(buf),
458 "get_replies timeout %.3f >= %.3f, final %d, temp %d, active %d\n",
459 time_now() - start_time, time_limit - start_time,
460 final_reply_count, reply_count, active_slaves);
461 logline(NULL, "? ", buf);
463 assert(reply_count > 0 && final_reply_count <= reply_count);
466 /* Maximum time (seconds) to wait for answers to fast gtp commands
467 * (all commands except pachi-genmoves and final_status_list). */
468 #define MAX_FAST_CMD_WAIT 1.0
470 /* Dispatch a new gtp command to all slaves.
471 * The slave lock must not be held upon entry and is released upon return.
472 * args is empty or ends with '\n' */
473 static enum parse_code
474 distributed_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
476 struct distributed *dist = e->data;
478 /* Commands that should not be sent to slaves */
479 if ((!strcasecmp(cmd, "quit") && !dist->slaves_quit)
480 || !strcasecmp(cmd, "uct_genbook")
481 || !strcasecmp(cmd, "uct_dumpbook")
482 || !strcasecmp(cmd, "kgs-chat")
484 /* and commands that will be sent to slaves later */
485 || !strcasecmp(cmd, "genmove")
486 || !strcasecmp(cmd, "kgs-genmove_cleanup")
487 || !strcasecmp(cmd, "final_score")
488 || !strcasecmp(cmd, "final_status_list"))
489 return P_OK;
491 pthread_mutex_lock(&slave_lock);
493 // Create a new command to be sent by the slave threads.
494 new_cmd(b, cmd, args);
496 /* Wait for replies here. If we don't wait, we run the
497 * risk of getting out of sync with most slaves and
498 * sending command history too frequently. */
499 get_replies(time_now() + MAX_FAST_CMD_WAIT, 0, b);
501 pthread_mutex_unlock(&slave_lock);
502 return P_OK;
505 /* pachi-genmoves returns a line "=id total_playouts threads[ reserved]" then a list of lines
506 * "coord playouts value". Keep this function in sync with uct_notify().
507 * Return the move with most playouts, its average value, and stats for debugging.
508 * slave_lock is held on entry and on return. */
509 static coord_t
510 select_best_move(struct board *b, struct move_stats *best_stats,
511 int *total_playouts, int *total_threads)
513 assert(reply_count > 0);
515 /* +2 for pass and resign. */
516 struct move_stats *stats = alloca((board_size2(b)+2) * sizeof(struct move_stats));
517 memset(stats, 0, (board_size2(b)+2) * sizeof(*stats));
518 stats += 2;
520 coord_t best_move = pass;
521 int best_playouts = -1;
522 *total_playouts = *total_threads = 0;
524 for (int reply = 0; reply < reply_count; reply++) {
525 char *r = gtp_replies[reply];
526 int id, playouts, threads;
527 if (sscanf(r, "=%d %d %d", &id, &playouts, &threads) != 3) continue;
528 *total_playouts += playouts;
529 *total_threads += threads;
530 // Skip the rest of the firt line if any (allow future extensions)
531 r = strchr(r, '\n');
533 char move[64];
534 struct move_stats s;
535 while (r && sscanf(++r, "%63s %d %f", move, &s.playouts, &s.value) == 3) {
536 coord_t *c = str2coord(move, board_size(b));
537 stats_add_result(&stats[*c], s.value, s.playouts);
538 if (stats[*c].playouts > best_playouts) {
539 best_playouts = stats[*c].playouts;
540 best_move = *c;
542 coord_done(c);
543 r = strchr(r, '\n');
546 *best_stats = stats[best_move];
547 return best_move;
550 /* Time control is mostly done by the slaves, so we use default values here. */
551 #define FUSEKI_END 20
552 #define YOSE_START 40
554 static coord_t *
555 distributed_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
557 struct distributed *dist = e->data;
558 double start = time_now();
560 long time_limit = 0;
561 int min_playouts = 0;
563 char *cmd = pass_all_alive ? "pachi-genmoves_cleanup" : "pachi-genmoves";
564 char args[128];
566 if (ti->period == TT_NULL) *ti = default_ti;
567 struct time_stop stop;
568 time_stop_conditions(ti, b, FUSEKI_END, YOSE_START, &stop);
570 if (ti->dim == TD_WALLTIME) {
571 time_limit = ti->len.t.timer_start + stop.worst.time;
573 /* Send time info to the slaves to make sure they all
574 * reply in time, particularly if they were out of sync
575 * and there are no time_left commands. We cannot send
576 * the absolute time limit because slaves may have a
577 * different system time.
578 * Keep this code in sync with gtp_parse(). */
579 snprintf(args, sizeof(args), "%s %.3f %.3f %d %d\n",
580 stone2str(color), ti->len.t.main_time,
581 ti->len.t.byoyomi_time, ti->len.t.byoyomi_periods,
582 ti->len.t.byoyomi_stones);
583 } else {
584 min_playouts = stop.desired.playouts;
586 /* For absolute number of simulations, slaves still
587 * use their own -t =NUM parameter. (The master
588 * needs to know the total number of simulations over
589 * all slaves so it has a different -t parameter.) */
590 snprintf(args, sizeof(args), "%s\n", stone2str(color));
593 pthread_mutex_lock(&slave_lock);
594 new_cmd(b, cmd, args);
596 get_replies(time_limit, min_playouts, b);
597 int replies = reply_count;
599 int playouts, threads;
600 dist->my_last_move.color = color;
601 dist->my_last_move.coord = select_best_move(b, &dist->my_last_stats, &playouts, &threads);
603 /* Tell the slaves to commit to the selected move, overwriting
604 * the last "pachi-genmoves" in the command history. */
605 char *coord = coord2str(dist->my_last_move.coord, b);
606 snprintf(args, sizeof(args), "%s %s\n", stone2str(color), coord);
607 update_cmd(b, "play", args);
608 pthread_mutex_unlock(&slave_lock);
610 if (DEBUGL(1)) {
611 char buf[BSIZE];
612 enum stone color = dist->my_last_move.color;
613 double time = time_now() - start + 0.000001; /* avoid divide by zero */
614 snprintf(buf, sizeof(buf),
615 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
616 "genmove in %0.2fs %d slaves %d threads (%d games/s,"
617 " %d games/s/slave, %d games/s/thread)\n",
618 stone2str(color), coord, get_value(dist->my_last_stats.value, color),
619 dist->my_last_stats.playouts, playouts, time, replies, threads,
620 (int)(playouts/time), (int)(playouts/time/replies),
621 (int)(playouts/time/threads));
622 logline(NULL, "* ", buf);
624 free(coord);
625 return coord_copy(dist->my_last_move.coord);
628 static char *
629 distributed_chat(struct engine *e, struct board *b, char *cmd)
631 struct distributed *dist = e->data;
632 static char reply[BSIZE];
634 cmd += strspn(cmd, " \n\t");
635 if (!strncasecmp(cmd, "winrate", 7)) {
636 enum stone color = dist->my_last_move.color;
637 snprintf(reply, BSIZE, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
638 dist->my_last_stats.playouts, active_slaves, stone2str(color),
639 coord2sstr(dist->my_last_move.coord, b),
640 100 * get_value(dist->my_last_stats.value, color));
641 return reply;
643 return NULL;
646 static int
647 scmp(const void *p1, const void *p2)
649 return strcasecmp(*(char * const *)p1, *(char * const *)p2);
652 static void
653 distributed_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
655 pthread_mutex_lock(&slave_lock);
657 new_cmd(b, "final_status_list", "dead\n");
658 get_replies(time_now() + MAX_FAST_CMD_WAIT, 0, b);
660 /* Find the most popular reply. */
661 qsort(gtp_replies, reply_count, sizeof(char *), scmp);
662 int best_reply = 0;
663 int best_count = 1;
664 int count = 1;
665 for (int reply = 1; reply < reply_count; reply++) {
666 if (!strcmp(gtp_replies[reply], gtp_replies[reply-1])) {
667 count++;
668 } else {
669 count = 1;
671 if (count > best_count) {
672 best_count = count;
673 best_reply = reply;
677 /* Pick the first move of each line as group. */
678 char *dead = gtp_replies[best_reply];
679 dead = strchr(dead, ' '); // skip "id "
680 while (dead && *++dead != '\n') {
681 coord_t *c = str2coord(dead, board_size(b));
682 mq_add(mq, *c);
683 coord_done(c);
684 dead = strchr(dead, '\n');
686 pthread_mutex_unlock(&slave_lock);
689 static struct distributed *
690 distributed_state_init(char *arg, struct board *b)
692 struct distributed *dist = calloc(1, sizeof(struct distributed));
694 dist->max_slaves = 100;
695 if (arg) {
696 char *optspec, *next = arg;
697 while (*next) {
698 optspec = next;
699 next += strcspn(next, ",");
700 if (*next) { *next++ = 0; } else { *next = 0; }
702 char *optname = optspec;
703 char *optval = strchr(optspec, '=');
704 if (optval) *optval++ = 0;
706 if (!strcasecmp(optname, "slave_port") && optval) {
707 dist->slave_port = strdup(optval);
708 } else if (!strcasecmp(optname, "proxy_port") && optval) {
709 dist->proxy_port = strdup(optval);
710 } else if (!strcasecmp(optname, "max_slaves") && optval) {
711 dist->max_slaves = atoi(optval);
712 } else if (!strcasecmp(optname, "slaves_quit")) {
713 dist->slaves_quit = !optval || atoi(optval);
714 } else {
715 fprintf(stderr, "distributed: Invalid engine argument %s or missing value\n", optname);
720 gtp_replies = calloc(dist->max_slaves, sizeof(char *));
722 if (!dist->slave_port) {
723 fprintf(stderr, "distributed: missing slave_port\n");
724 exit(1);
726 int slave_sock = port_listen(dist->slave_port, dist->max_slaves);
727 pthread_t thread;
728 for (int id = 0; id < dist->max_slaves; id++) {
729 pthread_create(&thread, NULL, slave_thread, (void *)(long)slave_sock);
732 if (dist->proxy_port) {
733 int proxy_sock = port_listen(dist->proxy_port, dist->max_slaves);
734 for (int id = 0; id < dist->max_slaves; id++) {
735 pthread_create(&thread, NULL, proxy_thread, (void *)(long)proxy_sock);
738 return dist;
741 struct engine *
742 engine_distributed_init(char *arg, struct board *b)
744 start_time = time_now();
745 struct distributed *dist = distributed_state_init(arg, b);
746 struct engine *e = calloc(1, sizeof(struct engine));
747 e->name = "Distributed Engine";
748 e->comment = "I'm playing the distributed engine. When I'm losing, I will resign, "
749 "if I think I win, I play until you pass. "
750 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
751 e->notify = distributed_notify;
752 e->genmove = distributed_genmove;
753 e->dead_group_list = distributed_dead_group_list;
754 e->chat = distributed_chat;
755 e->data = dist;
756 // Keep the threads and the open socket connections:
757 e->keep_on_clear = true;
759 return e;