1 /* This is a master for the "distributed" engine. It receives connections
2 * from slave machines, sends them gtp commands, then aggregates the
3 * results. It can also act as a proxy for the logs of all slave machines.
4 * The slave machines must run with engine "uct" (not "distributed").
5 * The master sends pachi-genmoves gtp commands regularly to each slave,
6 * gets as replies a list of candidate moves, their number of playouts
7 * and their value. The master then picks the most popular move. */
9 /* With time control, the master waits for all slaves, except
10 * when the allowed time is already passed. In this case the
11 * master picks among the available replies, or waits for just
12 * one reply if there is none yet.
13 * Without time control, the master waits until the desired
14 * number of games have been simulated. In this case the -t
15 * parameter for the master should be the sum of the parameters
18 /* The master sends updated statistics for the best moves
19 * in each genmoves command. In this version only the
20 * children of the root node are updated. The slaves
21 * reply with just their own stats; they remember what was
22 * previously received from or sent to the master, to
23 * distinguish their own contribution from that of other slaves. */
25 /* The master-slave protocol has has fault tolerance. If a slave is
26 * out of sync, the master sends it the appropriate command history. */
28 /* Pass me arguments like a=b,c=d,...
29 * Supported arguments:
30 * slave_port=SLAVE_PORT slaves connect to this port; this parameter is mandatory.
31 * max_slaves=MAX_SLAVES default 100
32 * slaves_quit=0|1 quit gtp command also sent to slaves, default false.
33 * proxy_port=PROXY_PORT slaves optionally send their logs to this port.
34 * Warning: with proxy_port, the master stderr mixes the logs of all
35 * machines but you can separate them again:
36 * slave logs: sed -n '/< .*:/s/.*< /< /p' logfile
37 * master logs: perl -0777 -pe 's/<[ <].*:.*\n//g' logfile
40 /* A configuration without proxy would have one master run on masterhost as:
41 * zzgo -e distributed slave_port=1234
42 * and N slaves running as:
43 * zzgo -e uct -g masterhost:1234 slave
45 * zzgo -e distributed slave_port=1234,proxy_port=1235
46 * zzgo -e uct -g masterhost:1234 -l masterhost:1235 slave
47 * If the master itself runs on a machine other than that running gogui,
48 * gogui-twogtp, kgsGtp or cgosGtp, it can redirect its gtp port:
49 * zzgo -e distributed -g 10000 slave_port=1234,proxy_port=1235
61 #include <sys/types.h>
62 #include <sys/socket.h>
63 #include <arpa/inet.h>
77 #include "distributed/distributed.h"
79 /* Internal engine state. */
85 struct move my_last_move
;
86 struct move_stats my_last_stats
;
89 /* Default number of simulations to perform per move.
90 * Note that this is in total over all slaves! */
91 #define DIST_GAMES 80000
92 static const struct time_info default_ti
= {
95 .len
= { .games
= DIST_GAMES
},
98 #define get_value(value, color) \
99 ((color) == S_BLACK ? (value) : 1 - (value))
101 /* Max size for one line of reply or slave log. */
104 /* Max size of all gtp commands for one game.
105 * 60 chars for the first line of genmoves plus 100 lines
106 * of 30 chars each for the stats at last move. */
107 #define CMDS_SIZE (60*MAX_GAMELEN + 30*100)
109 /* All gtp commands for current game separated by \n */
110 static char gtp_cmds
[CMDS_SIZE
];
112 /* Latest gtp command sent to slaves. */
113 static char *gtp_cmd
= NULL
;
115 /* Slaves send gtp_cmd when cmd_count changes. */
116 static int cmd_count
= 0;
118 /* Remember at most 10 gtp ids per move: kgs-rules, boardsize, clear_board,
119 * time_settings, komi, handicap, genmoves, play pass, play pass, final_status_list */
120 #define MAX_CMDS_PER_MOVE 10
122 /* History of gtp commands sent for current game, indexed by move. */
123 static int id_history
[MAX_GAMELEN
][MAX_CMDS_PER_MOVE
];
124 static char *cmd_history
[MAX_GAMELEN
][MAX_CMDS_PER_MOVE
];
126 #define next_slot(slot) (((slot) + 1) % MAX_CMDS_PER_MOVE)
128 /* Number of active slave machines working for this master. */
129 static int active_slaves
= 0;
131 /* Number of replies to last gtp command already received. */
132 static int reply_count
= 0;
134 /* All replies to latest gtp command are in gtp_replies[0..reply_count-1]. */
135 static char **gtp_replies
;
137 /* Mutex protecting gtp_cmds, gtp_cmd, id_history, cmd_history,
138 * cmd_count, active_slaves, reply_count & gtp_replies */
139 static pthread_mutex_t slave_lock
= PTHREAD_MUTEX_INITIALIZER
;
141 /* Condition signaled when a new gtp command is available. */
142 static pthread_cond_t cmd_cond
= PTHREAD_COND_INITIALIZER
;
144 /* Condition signaled when reply_count increases. */
145 static pthread_cond_t reply_cond
= PTHREAD_COND_INITIALIZER
;
147 /* Mutex protecting stderr. Must not be held at same time as slave_lock. */
148 static pthread_mutex_t log_lock
= PTHREAD_MUTEX_INITIALIZER
;
150 /* Absolute time when this program was started.
151 * For debugging only. */
152 static double start_time
;
154 /* Write the time, client address, prefix, and string s to stderr atomically.
155 * s should end with a \n */
157 logline(struct in_addr
*client
, char *prefix
, char *s
)
159 double now
= time_now();
160 char addr
[INET_ADDRSTRLEN
];
162 inet_ntop(AF_INET
, client
, addr
, sizeof(addr
));
166 pthread_mutex_lock(&log_lock
);
167 fprintf(stderr
, "%s%15s %9.3f: %s", prefix
, addr
, now
- start_time
, s
);
168 pthread_mutex_unlock(&log_lock
);
171 /* Thread opening a connection on the given socket and copying input
172 * from there to stderr. */
174 proxy_thread(void *arg
)
176 int proxy_sock
= (long)arg
;
177 assert(proxy_sock
>= 0);
179 struct in_addr client
;
180 int conn
= open_server_connection(proxy_sock
, &client
);
181 FILE *f
= fdopen(conn
, "r");
183 while (fgets(buf
, BSIZE
, f
)) {
184 logline(&client
, "< ", buf
);
190 /* Get a reply to one gtp command. Return the gtp command id,
191 * or -1 if error. reply must have at least CMDS_SIZE bytes.
192 * slave_lock is not held on either entry or exit of this function. */
194 get_reply(FILE *f
, struct in_addr client
, char *reply
)
199 while (fgets(line
, reply
+ CMDS_SIZE
- line
, f
) && *line
!= '\n') {
200 if (DEBUGL(3) || (DEBUGL(2) && line
== reply
))
201 logline(&client
, "<<", line
);
202 if (reply_id
< 0 && (*line
== '=' || *line
== '?') && isdigit(line
[1]))
203 reply_id
= atoi(line
+1);
204 line
+= strlen(line
);
206 if (*line
!= '\n') return -1;
210 /* Send one gtp command and get a reply from the slave machine.
211 * Write the reply in buf which must have at least CMDS_SIZE bytes.
212 * Return the gtp command id, or -1 if error.
213 * slave_lock is held on both entry and exit of this function. */
215 send_command(char *to_send
, FILE *f
, struct in_addr client
, char *buf
)
217 assert(to_send
&& gtp_cmd
);
218 strncpy(buf
, to_send
, CMDS_SIZE
);
219 bool resend
= to_send
!= gtp_cmd
;
221 pthread_mutex_unlock(&slave_lock
);
223 if (DEBUGL(1) && resend
)
224 logline(&client
, "? ",
225 to_send
== gtp_cmds
? "resend all\n" : "partial resend\n");
230 char *s
= strchr(buf
, '\n');
233 logline(&client
, ">>", buf
);
236 int reply_id
= get_reply(f
, client
, buf
);
238 pthread_mutex_lock(&slave_lock
);
242 /* Process the reply received from a slave machine.
243 * Copy it to reply_buf and return NULL if ok, or return
244 * the command to be sent again if the slave is out of sync.
245 * slave_lock is held on both entry and exit of this function. */
247 process_reply(int reply_id
, char *reply
, char *reply_buf
,
248 int *last_reply_id
, int *reply_slot
)
250 /* Make sure we are still in sync. cmd_count may have
251 * changed but the reply is valid as long as cmd_id didn't
252 * change (this only occurs for consecutive genmoves). */
253 int cmd_id
= atoi(gtp_cmd
);
254 if (reply_id
== cmd_id
&& *reply
== '=') {
255 strncpy(reply_buf
, reply
, CMDS_SIZE
);
256 if (reply_id
!= *last_reply_id
)
257 *reply_slot
= reply_count
++;
258 gtp_replies
[*reply_slot
] = reply_buf
;
259 *last_reply_id
= reply_id
;
261 pthread_cond_signal(&reply_cond
);
264 /* Resend everything if slave got latest command,
265 * but doesn't have a correct board. */
266 if (reply_id
== cmd_id
) return gtp_cmds
;
268 /* The slave is ouf-of-sync. Check whether the last command
269 * it received belongs to the current game. If so resend
270 * starting at the last move known by slave, otherwise
271 * resend the whole history. */
272 int reply_move
= move_number(reply_id
);
273 if (reply_move
> move_number(cmd_id
)) return gtp_cmds
;
276 for (slot
= 0; slot
< MAX_CMDS_PER_MOVE
; slot
++) {
277 if (reply_id
== id_history
[reply_move
][slot
]) break;
279 if (slot
== MAX_CMDS_PER_MOVE
) return gtp_cmds
;
281 char *to_send
= cmd_history
[reply_move
][slot
];
283 /* Do not resend the same command if the slave did it
284 * successfully and this was the last command at the
285 * same move number. This avoids two "play" commands
286 * in a row which forces the slave to clear the tree. */
287 if (*reply
!= '=') return to_send
;
288 char *next
= strchr(to_send
, '\n');
290 if (!next
[1]) return to_send
;
292 /* If play has overwritten genmoves, send play. */
293 if (cmd_history
[reply_move
][next_slot(slot
)] == to_send
)
296 /* At this point we know that that there was no overwrite,
297 * and that the slave got the latest command at this move number.
298 * This command cannot be a genmoves so it has a single line.
299 * It is safe to send the next command. */
301 assert(cmd_history
[reply_move
][next_slot(slot
)] == next
302 || cmd_history
[reply_move
+1][next_slot(slot
)] == next
);
306 /* Main loop of a slave thread.
307 * Send the current command to the slave machine and wait for a reply.
308 * Resend command history if the slave machine is out of sync.
309 * Returns when the connection with the slave machine is cut.
310 * slave_lock is held on both entry and exit of this function. */
312 slave_loop(FILE *f
, struct in_addr client
, char *reply_buf
, bool resend
)
314 char *to_send
= gtp_cmd
;
315 int last_cmd_sent
= 0;
316 int last_reply_id
= -1;
319 while (last_cmd_sent
== cmd_count
&& !resend
) {
320 // Wait for a new gtp command.
321 pthread_cond_wait(&cmd_cond
, &slave_lock
);
325 /* Command available, send it to slave machine.
326 * If slave was out of sync, send the history. */
328 last_cmd_sent
= cmd_count
;
330 /* Send the command and get the reply, which always ends with \n\n
331 * The slave machine sends "=id reply" or "?id reply"
332 * with id == cmd_id if it is in sync. */
333 int reply_id
= send_command(to_send
, f
, client
, buf
);
334 if (reply_id
== -1) return;
336 to_send
= process_reply(reply_id
, buf
, reply_buf
,
337 &last_reply_id
, &reply_slot
);
339 /* Good reply. Force waiting for a new command.
340 * The next genmoves stats we send must include those
341 * just received (this is assumed by the slave). */
342 last_cmd_sent
= cmd_count
;
350 /* Thread sending gtp commands to one slave machine, and
351 * reading replies. If a slave machine dies, this thread waits
352 * for a connection from another slave. */
354 slave_thread(void *arg
)
356 int slave_sock
= (long)arg
;
357 assert(slave_sock
>= 0);
358 char reply_buf
[CMDS_SIZE
];
362 /* Wait for a connection from any slave. */
363 struct in_addr client
;
364 int conn
= open_server_connection(slave_sock
, &client
);
366 FILE *f
= fdopen(conn
, "r+");
368 logline(&client
, "= ", "new slave\n");
370 /* Minimal check of the slave identity. */
372 if (!fgets(reply_buf
, sizeof(reply_buf
), f
)
373 || strncasecmp(reply_buf
, "= Pachi", 7)
374 || !fgets(reply_buf
, sizeof(reply_buf
), f
)
375 || strcmp(reply_buf
, "\n")) {
376 logline(&client
, "? ", "bad slave\n");
381 pthread_mutex_lock(&slave_lock
);
383 slave_loop(f
, client
, reply_buf
, resend
);
385 assert(active_slaves
> 0);
387 // Unblock main thread if it was waiting for this slave.
388 pthread_cond_signal(&reply_cond
);
389 pthread_mutex_unlock(&slave_lock
);
393 logline(&client
, "= ", "lost slave\n");
398 /* Create a new gtp command for all slaves. The slave lock is held
399 * upon entry and upon return, so the command will actually be
400 * sent when the lock is released. The last command is overwritten
401 * if gtp_cmd points to a non-empty string. cmd is a single word;
402 * args has all arguments and is empty or has a trailing \n */
404 update_cmd(struct board
*b
, char *cmd
, char *args
, bool new_id
)
407 /* To make sure the slaves are in sync, we ignore the original id
408 * and use the board number plus some random bits as gtp id. */
409 static int gtp_id
= -1;
410 int moves
= is_reset(cmd
) ? 0 : b
->moves
;
412 /* fast_random() is 16-bit only so the multiplication can't overflow. */
413 gtp_id
= force_reply(moves
+ fast_random(65535) * DIST_GAMELEN
);
416 snprintf(gtp_cmd
, gtp_cmds
+ CMDS_SIZE
- gtp_cmd
, "%d %s %s",
417 gtp_id
, cmd
, *args
? args
: "\n");
420 /* Remember history for out-of-sync slaves. */
423 slot
= next_slot(slot
);
424 id_history
[moves
][slot
] = gtp_id
;
425 cmd_history
[moves
][slot
] = gtp_cmd
;
427 // Notify the slave threads about the new command.
428 pthread_cond_broadcast(&cmd_cond
);
431 /* Update the command history, then create a new gtp command
432 * for all slaves. The slave lock is held upon entry and
433 * upon return, so the command will actually be sent when the
434 * lock is released. cmd is a single word; args has all
435 * arguments and is empty or has a trailing \n */
437 new_cmd(struct board
*b
, char *cmd
, char *args
)
439 // Clear the history when a new game starts:
440 if (!gtp_cmd
|| is_gamestart(cmd
)) {
442 memset(cmd_history
, 0, sizeof(cmd_history
));
444 /* Preserve command history for new slaves.
445 * To indicate that the slave should only reply to
446 * the last command we force the id of previous
447 * commands to be just the move number. */
448 int id
= prevent_reply(atoi(gtp_cmd
));
449 int len
= strspn(gtp_cmd
, "0123456789");
451 snprintf(buf
, sizeof(buf
), "%0*d", len
, id
);
452 memcpy(gtp_cmd
, buf
, len
);
454 gtp_cmd
+= strlen(gtp_cmd
);
457 // Let the slave threads send the new gtp command:
458 update_cmd(b
, cmd
, args
, true);
461 /* Wait for at least one new reply. Return when all slaves have
462 * replied, or when the given absolute time is passed.
463 * The replies are returned in gtp_replies[0..reply_count-1]
464 * slave_lock is held on entry and on return. */
466 get_replies(double time_limit
)
469 if (reply_count
> 0) {
472 ts
.tv_nsec
= (int)(modf(time_limit
, &sec
)*1000000000.0);
473 ts
.tv_sec
= (int)sec
;
474 pthread_cond_timedwait(&reply_cond
, &slave_lock
, &ts
);
476 pthread_cond_wait(&reply_cond
, &slave_lock
);
478 if (reply_count
== 0) continue;
479 if (reply_count
>= active_slaves
) return;
480 if (time_now() >= time_limit
) break;
484 snprintf(buf
, sizeof(buf
),
485 "get_replies timeout %.3f >= %.3f, replies %d < active %d\n",
486 time_now() - start_time
, time_limit
- start_time
,
487 reply_count
, active_slaves
);
488 logline(NULL
, "? ", buf
);
490 assert(reply_count
> 0);
493 /* Maximum time (seconds) to wait for answers to fast gtp commands
494 * (all commands except pachi-genmoves and final_status_list). */
495 #define MAX_FAST_CMD_WAIT 1.0
497 /* How often to send a stats update to slaves (seconds) */
498 #define STATS_UPDATE_INTERVAL 0.1 /* 100ms */
500 /* Maximum time (seconds) to wait between genmoves
501 * (all commands except pachi-genmoves and final_status_list). */
502 #define MAX_FAST_CMD_WAIT 1.0
504 /* Dispatch a new gtp command to all slaves.
505 * The slave lock must not be held upon entry and is released upon return.
506 * args is empty or ends with '\n' */
507 static enum parse_code
508 distributed_notify(struct engine
*e
, struct board
*b
, int id
, char *cmd
, char *args
, char **reply
)
510 struct distributed
*dist
= e
->data
;
512 /* Commands that should not be sent to slaves.
513 * time_left will be part of next pachi-genmoves,
514 * we reduce latency by not forwarding it here. */
515 if ((!strcasecmp(cmd
, "quit") && !dist
->slaves_quit
)
516 || !strcasecmp(cmd
, "uct_genbook")
517 || !strcasecmp(cmd
, "uct_dumpbook")
518 || !strcasecmp(cmd
, "kgs-chat")
519 || !strcasecmp(cmd
, "time_left")
521 /* and commands that will be sent to slaves later */
522 || !strcasecmp(cmd
, "genmove")
523 || !strcasecmp(cmd
, "kgs-genmove_cleanup")
524 || !strcasecmp(cmd
, "final_score")
525 || !strcasecmp(cmd
, "final_status_list"))
528 pthread_mutex_lock(&slave_lock
);
530 // Create a new command to be sent by the slave threads.
531 new_cmd(b
, cmd
, args
);
533 /* Wait for replies here. If we don't wait, we run the
534 * risk of getting out of sync with most slaves and
535 * sending command history too frequently. */
536 get_replies(time_now() + MAX_FAST_CMD_WAIT
);
538 pthread_mutex_unlock(&slave_lock
);
542 /* genmoves returns a line "=id played_own total_playouts threads keep_looking[ reserved]"
543 * then a list of lines "coord playouts value amaf_playouts amaf_value".
544 * Return the move with most playouts, and additional stats.
545 * Keep this code in sync with uct/slave.c:report_stats().
546 * slave_lock is held on entry and on return. */
548 select_best_move(struct board
*b
, struct move_stats2
*stats
, int *played
,
549 int *total_playouts
, int *total_threads
, bool *keep_looking
)
551 assert(reply_count
> 0);
553 /* +2 for pass and resign */
554 memset(stats
-2, 0, (board_size2(b
)+2) * sizeof(*stats
));
556 coord_t best_move
= pass
;
557 int best_playouts
= -1;
563 for (int reply
= 0; reply
< reply_count
; reply
++) {
564 char *r
= gtp_replies
[reply
];
566 if (sscanf(r
, "=%d %d %d %d %d", &id
, &o
, &p
, &t
, &k
) != 5) continue;
568 *total_playouts
+= p
;
571 // Skip the rest of the firt line if any (allow future extensions)
575 struct move_stats2 s
;
576 while (r
&& sscanf(++r
, "%63s %d %f %d %f", move
, &s
.u
.playouts
,
577 &s
.u
.value
, &s
.amaf
.playouts
, &s
.amaf
.value
) == 5) {
578 coord_t
*c
= str2coord(move
, board_size(b
));
579 stats_add_result(&stats
[*c
].u
, s
.u
.value
, s
.u
.playouts
);
580 stats_add_result(&stats
[*c
].amaf
, s
.amaf
.value
, s
.amaf
.playouts
);
582 if (stats
[*c
].u
.playouts
> best_playouts
) {
583 best_playouts
= stats
[*c
].u
.playouts
;
590 *keep_looking
= keep
> reply_count
/ 2;
594 /* Set the args for the genmoves command. If stats is not null,
595 * append the stats from all slaves above min_playouts, except
596 * for pass and resign. args must have CMDS_SIZE bytes and
597 * upon return ends with an empty line.
598 * Keep this code in sync with uct_genmoves().
599 * slave_lock is held on entry and on return. */
601 genmoves_args(char *args
, struct board
*b
, enum stone color
, int played
,
602 struct time_info
*ti
, struct move_stats2
*stats
, int min_playouts
)
604 char *end
= args
+ CMDS_SIZE
;
605 char *s
= args
+ snprintf(args
, CMDS_SIZE
, "%s %d", stone2str(color
), played
);
607 if (ti
->dim
== TD_WALLTIME
) {
608 s
+= snprintf(s
, end
- s
, " %.3f %.3f %d %d",
609 ti
->len
.t
.main_time
, ti
->len
.t
.byoyomi_time
,
610 ti
->len
.t
.byoyomi_periods
, ti
->len
.t
.byoyomi_stones
);
612 s
+= snprintf(s
, end
- s
, "\n");
615 if (stats
[c
].u
.playouts
<= min_playouts
) continue;
616 s
+= snprintf(s
, end
- s
, "%s %d %.7f %d %.7f\n",
618 stats
[c
].u
.playouts
, stats
[c
].u
.value
,
619 stats
[c
].amaf
.playouts
, stats
[c
].amaf
.value
);
622 s
+= snprintf(s
, end
- s
, "\n");
625 /* Time control is mostly done by the slaves, so we use default values here. */
626 #define FUSEKI_END 20
627 #define YOSE_START 40
630 distributed_genmove(struct engine
*e
, struct board
*b
, struct time_info
*ti
,
631 enum stone color
, bool pass_all_alive
)
633 struct distributed
*dist
= e
->data
;
634 double now
= time_now();
637 char *cmd
= pass_all_alive
? "pachi-genmoves_cleanup" : "pachi-genmoves";
638 char args
[CMDS_SIZE
];
641 int played
, playouts
, threads
;
643 if (ti
->period
== TT_NULL
) *ti
= default_ti
;
644 struct time_stop stop
;
645 time_stop_conditions(ti
, b
, FUSEKI_END
, YOSE_START
, &stop
);
646 struct time_info saved_ti
= *ti
;
648 /* Send the first genmoves without stats. */
649 genmoves_args(args
, b
, color
, 0, ti
, NULL
, 0);
651 /* Combined move stats from all slaves, only for children
652 * of the root node, plus 2 for pass and resign. */
653 struct move_stats2
*stats
= alloca((board_size2(b
)+2) * sizeof(struct move_stats2
));
656 pthread_mutex_lock(&slave_lock
);
657 new_cmd(b
, cmd
, args
);
659 /* Loop until most slaves want to quit or time elapsed. */
662 get_replies(now
+ STATS_UPDATE_INTERVAL
);
664 if (ti
->dim
== TD_WALLTIME
)
665 time_sub(ti
, now
- start
);
668 best
= select_best_move(b
, stats
, &played
, &playouts
, &threads
, &keep_looking
);
670 if (!keep_looking
) break;
671 if (ti
->dim
== TD_WALLTIME
) {
672 if (now
- ti
->len
.t
.timer_start
>= stop
.worst
.time
) break;
674 if (played
>= stop
.worst
.playouts
) break;
678 char *coord
= coord2sstr(best
, b
);
679 snprintf(buf
, sizeof(buf
),
680 "temp winner is %s %s with score %1.4f (%d/%d games)"
681 " %d slaves %d threads\n",
682 stone2str(color
), coord
, get_value(stats
[best
].u
.value
, color
),
683 stats
[best
].u
.playouts
, playouts
, reply_count
, threads
);
684 logline(NULL
, "* ", buf
);
686 /* Send the command with the same gtp id, to avoid discarding
687 * a reply to a previous genmoves at the same move. */
688 genmoves_args(args
, b
, color
, played
, ti
, stats
, stats
[best
].u
.playouts
/ 100);
689 update_cmd(b
, cmd
, args
, false);
691 int replies
= reply_count
;
693 /* Do not subtract time spent twice (see gtp_parse). */
696 dist
->my_last_move
.color
= color
;
697 dist
->my_last_move
.coord
= best
;
698 dist
->my_last_stats
= stats
[best
].u
;
700 /* Tell the slaves to commit to the selected move, overwriting
701 * the last "pachi-genmoves" in the command history. */
702 char *coord
= coord2str(best
, b
);
703 snprintf(args
, sizeof(args
), "%s %s\n", stone2str(color
), coord
);
704 update_cmd(b
, "play", args
, true);
705 pthread_mutex_unlock(&slave_lock
);
709 double time
= now
- first
+ 0.000001; /* avoid divide by zero */
710 snprintf(buf
, sizeof(buf
),
711 "GLOBAL WINNER is %s %s with score %1.4f (%d/%d games)\n"
712 "genmove %d games in %0.2fs %d slaves %d threads (%d games/s,"
713 " %d games/s/slave, %d games/s/thread)\n",
714 stone2str(color
), coord
, get_value(stats
[best
].u
.value
, color
),
715 stats
[best
].u
.playouts
, playouts
, played
, time
, replies
, threads
,
716 (int)(played
/time
), (int)(played
/time
/replies
),
717 (int)(played
/time
/threads
));
718 logline(NULL
, "* ", buf
);
721 return coord_copy(best
);
725 distributed_chat(struct engine
*e
, struct board
*b
, char *cmd
)
727 struct distributed
*dist
= e
->data
;
728 static char reply
[BSIZE
];
730 cmd
+= strspn(cmd
, " \n\t");
731 if (!strncasecmp(cmd
, "winrate", 7)) {
732 enum stone color
= dist
->my_last_move
.color
;
733 snprintf(reply
, BSIZE
, "In %d playouts at %d machines, %s %s can win with %.2f%% probability.",
734 dist
->my_last_stats
.playouts
, active_slaves
, stone2str(color
),
735 coord2sstr(dist
->my_last_move
.coord
, b
),
736 100 * get_value(dist
->my_last_stats
.value
, color
));
743 scmp(const void *p1
, const void *p2
)
745 return strcasecmp(*(char * const *)p1
, *(char * const *)p2
);
749 distributed_dead_group_list(struct engine
*e
, struct board
*b
, struct move_queue
*mq
)
751 pthread_mutex_lock(&slave_lock
);
753 new_cmd(b
, "final_status_list", "dead\n");
754 get_replies(time_now() + MAX_FAST_CMD_WAIT
);
756 /* Find the most popular reply. */
757 qsort(gtp_replies
, reply_count
, sizeof(char *), scmp
);
761 for (int reply
= 1; reply
< reply_count
; reply
++) {
762 if (!strcmp(gtp_replies
[reply
], gtp_replies
[reply
-1])) {
767 if (count
> best_count
) {
773 /* Pick the first move of each line as group. */
774 char *dead
= gtp_replies
[best_reply
];
775 dead
= strchr(dead
, ' '); // skip "id "
776 while (dead
&& *++dead
!= '\n') {
777 coord_t
*c
= str2coord(dead
, board_size(b
));
780 dead
= strchr(dead
, '\n');
782 pthread_mutex_unlock(&slave_lock
);
785 static struct distributed
*
786 distributed_state_init(char *arg
, struct board
*b
)
788 struct distributed
*dist
= calloc2(1, sizeof(struct distributed
));
790 dist
->max_slaves
= 100;
792 char *optspec
, *next
= arg
;
795 next
+= strcspn(next
, ",");
796 if (*next
) { *next
++ = 0; } else { *next
= 0; }
798 char *optname
= optspec
;
799 char *optval
= strchr(optspec
, '=');
800 if (optval
) *optval
++ = 0;
802 if (!strcasecmp(optname
, "slave_port") && optval
) {
803 dist
->slave_port
= strdup(optval
);
804 } else if (!strcasecmp(optname
, "proxy_port") && optval
) {
805 dist
->proxy_port
= strdup(optval
);
806 } else if (!strcasecmp(optname
, "max_slaves") && optval
) {
807 dist
->max_slaves
= atoi(optval
);
808 } else if (!strcasecmp(optname
, "slaves_quit")) {
809 dist
->slaves_quit
= !optval
|| atoi(optval
);
811 fprintf(stderr
, "distributed: Invalid engine argument %s or missing value\n", optname
);
816 gtp_replies
= calloc2(dist
->max_slaves
, sizeof(char *));
818 if (!dist
->slave_port
) {
819 fprintf(stderr
, "distributed: missing slave_port\n");
822 int slave_sock
= port_listen(dist
->slave_port
, dist
->max_slaves
);
824 for (int id
= 0; id
< dist
->max_slaves
; id
++) {
825 pthread_create(&thread
, NULL
, slave_thread
, (void *)(long)slave_sock
);
828 if (dist
->proxy_port
) {
829 int proxy_sock
= port_listen(dist
->proxy_port
, dist
->max_slaves
);
830 for (int id
= 0; id
< dist
->max_slaves
; id
++) {
831 pthread_create(&thread
, NULL
, proxy_thread
, (void *)(long)proxy_sock
);
838 engine_distributed_init(char *arg
, struct board
*b
)
840 start_time
= time_now();
841 struct distributed
*dist
= distributed_state_init(arg
, b
);
842 struct engine
*e
= calloc2(1, sizeof(struct engine
));
843 e
->name
= "Distributed Engine";
844 e
->comment
= "I'm playing the distributed engine. When I'm losing, I will resign, "
845 "if I think I win, I play until you pass. "
846 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
847 e
->notify
= distributed_notify
;
848 e
->genmove
= distributed_genmove
;
849 e
->dead_group_list
= distributed_dead_group_list
;
850 e
->chat
= distributed_chat
;
852 // Keep the threads and the open socket connections:
853 e
->keep_on_clear
= true;