Distributed mode: no time limit by default on slaves, control on master only
[pachi.git] / uct / search.c
blobf29cd853b2572c21500fa169e7217a2489246f20
1 #include <assert.h>
2 #include <limits.h>
3 #include <math.h>
4 #include <pthread.h>
5 #include <signal.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <time.h>
11 #define DEBUG
13 #include "debug.h"
14 #include "distributed/distributed.h"
15 #include "move.h"
16 #include "random.h"
17 #include "timeinfo.h"
18 #include "uct/dynkomi.h"
19 #include "uct/internal.h"
20 #include "uct/search.h"
21 #include "uct/tree.h"
22 #include "uct/uct.h"
23 #include "uct/walk.h"
26 /* Default time settings for the UCT engine. In distributed mode, slaves are
27 * unlimited by default and all control is done on the master, either in time
28 * or with total number of playouts over all slaves. (It is also possible but
29 * not recommended to limit only the slaves; the master then decides the move
30 * when a majority of slaves have made their choice.) */
31 static struct time_info default_ti;
32 static __attribute__((constructor)) void
33 default_ti_init(void)
35 time_parse(&default_ti, "15");
38 static const struct time_info unlimited_ti = {
39 .period = TT_MOVE,
40 .dim = TD_GAMES,
41 .len = { .games = INT_MAX },
44 /* When terminating UCT search early, the safety margin to add to the
45 * remaining playout number estimate when deciding whether the result can
46 * still change. */
47 #define PLAYOUT_DELTA_SAFEMARGIN 1000
49 /* Minimal number of simulations to consider early break. */
50 #define PLAYOUT_EARLY_BREAK_MIN 5000
52 /* Minimal time to consider early break (in seconds). */
53 #define TIME_EARLY_BREAK_MIN 1.0
56 /* Pachi threading structure:
58 * main thread
59 * | main(), GTP communication, ...
60 * | starts and stops the search managed by thread_manager
61 * |
62 * thread_manager
63 * | spawns and collects worker threads
64 * |
65 * worker0
66 * worker1
67 * ...
68 * workerK
69 * uct_playouts() loop, doing descend-playout until uct_halt
71 * Another way to look at it is by functions (lines denote thread boundaries):
73 * | uct_genmove()
74 * | uct_search() (uct_search_start() .. uct_search_stop())
75 * | -----------------------
76 * | spawn_thread_manager()
77 * | -----------------------
78 * | spawn_worker()
79 * V uct_playouts() */
81 /* Set in thread manager in case the workers should stop. */
82 volatile sig_atomic_t uct_halt = 0;
83 /* ID of the thread manager. */
84 static pthread_t thread_manager;
85 bool thread_manager_running;
87 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
88 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
89 static volatile int finish_thread;
90 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
92 static void *
93 spawn_worker(void *ctx_)
95 struct uct_thread_ctx *ctx = ctx_;
96 /* Setup */
97 fast_srandom(ctx->seed);
98 /* Run */
99 ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->ti);
100 /* Finish */
101 pthread_mutex_lock(&finish_serializer);
102 pthread_mutex_lock(&finish_mutex);
103 finish_thread = ctx->tid;
104 pthread_cond_signal(&finish_cond);
105 pthread_mutex_unlock(&finish_mutex);
106 return ctx;
109 /* Thread manager, controlling worker threads. It must be called with
110 * finish_mutex lock held, but it will unlock it itself before exiting;
111 * this is necessary to be completely deadlock-free. */
112 /* The finish_cond can be signalled for it to stop; in that case,
113 * the caller should set finish_thread = -1. */
114 /* After it is started, it will update mctx->t to point at some tree
115 * used for the actual search, on return
116 * it will set mctx->games to the number of performed simulations. */
117 static void *
118 spawn_thread_manager(void *ctx_)
120 /* In thread_manager, we use only some of the ctx fields. */
121 struct uct_thread_ctx *mctx = ctx_;
122 struct uct *u = mctx->u;
123 struct tree *t = mctx->t;
124 fast_srandom(mctx->seed);
126 int played_games = 0;
127 pthread_t threads[u->threads];
128 int joined = 0;
130 uct_halt = 0;
132 /* Garbage collect the tree by preference when pondering. */
133 if (u->pondering && t->nodes && t->nodes_size >= t->pruning_threshold) {
134 t->root = tree_garbage_collect(t, t->root);
137 /* Spawn threads... */
138 for (int ti = 0; ti < u->threads; ti++) {
139 struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx));
140 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
141 mctx->t = ctx->t = t;
142 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
143 ctx->ti = mctx->ti;
144 pthread_attr_t a;
145 pthread_attr_init(&a);
146 pthread_attr_setstacksize(&a, 1048576);
147 pthread_create(&threads[ti], &a, spawn_worker, ctx);
148 if (UDEBUGL(3))
149 fprintf(stderr, "Spawned worker %d\n", ti);
152 /* ...and collect them back: */
153 while (joined < u->threads) {
154 /* Wait for some thread to finish... */
155 pthread_cond_wait(&finish_cond, &finish_mutex);
156 if (finish_thread < 0) {
157 /* Stop-by-caller. Tell the workers to wrap up
158 * and unblock them from terminating. */
159 uct_halt = 1;
160 /* We need to make sure the workers do not complete
161 * the termination sequence before we get officially
162 * stopped - their wake and the stop wake could get
163 * coalesced. */
164 pthread_mutex_unlock(&finish_serializer);
165 continue;
167 /* ...and gather its remnants. */
168 struct uct_thread_ctx *ctx;
169 pthread_join(threads[finish_thread], (void **) &ctx);
170 played_games += ctx->games;
171 joined++;
172 free(ctx);
173 if (UDEBUGL(3))
174 fprintf(stderr, "Joined worker %d\n", finish_thread);
175 pthread_mutex_unlock(&finish_serializer);
178 pthread_mutex_unlock(&finish_mutex);
180 mctx->games = played_games;
181 return mctx;
185 /*** THREAD MANAGER end */
187 /*** Search infrastructure: */
191 uct_search_games(struct uct_search_state *s)
193 return s->ctx->t->root->u.playouts;
196 void
197 uct_search_start(struct uct *u, struct board *b, enum stone color,
198 struct tree *t, struct time_info *ti,
199 struct uct_search_state *s)
201 /* Set up search state. */
202 s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts;
203 s->print_interval = u->reportfreq * u->threads;
204 s->fullmem = false;
206 if (ti) {
207 if (ti->period == TT_NULL) {
208 if (u->slave) {
209 *ti = unlimited_ti;
210 } else {
211 *ti = default_ti;
212 time_start_timer(ti);
215 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, u->max_maintime_ratio, &s->stop);
218 /* Fire up the tree search thread manager, which will in turn
219 * spawn the searching threads. */
220 assert(u->threads > 0);
221 assert(!thread_manager_running);
222 static struct uct_thread_ctx mctx;
223 mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536), .ti = ti };
224 s->ctx = &mctx;
225 pthread_mutex_lock(&finish_serializer);
226 pthread_mutex_lock(&finish_mutex);
227 pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx);
228 thread_manager_running = true;
231 struct uct_thread_ctx *
232 uct_search_stop(void)
234 assert(thread_manager_running);
236 /* Signal thread manager to stop the workers. */
237 pthread_mutex_lock(&finish_mutex);
238 finish_thread = -1;
239 pthread_cond_signal(&finish_cond);
240 pthread_mutex_unlock(&finish_mutex);
242 /* Collect the thread manager. */
243 struct uct_thread_ctx *pctx;
244 thread_manager_running = false;
245 pthread_join(thread_manager, (void **) &pctx);
246 return pctx;
250 void
251 uct_search_progress(struct uct *u, struct board *b, enum stone color,
252 struct tree *t, struct time_info *ti,
253 struct uct_search_state *s, int i)
255 struct uct_thread_ctx *ctx = s->ctx;
257 /* Adjust dynkomi? */
258 int di = u->dynkomi_interval * u->threads;
259 if (ctx->t->use_extra_komi && u->dynkomi->permove
260 && !u->pondering && di
261 && i > s->last_dynkomi + di) {
262 s->last_dynkomi += di;
263 floating_t old_dynkomi = ctx->t->extra_komi;
264 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
265 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
266 fprintf(stderr, "dynkomi adjusted (%f -> %f)\n",
267 old_dynkomi, ctx->t->extra_komi);
270 /* Print progress? */
271 if (i - s->last_print > s->print_interval) {
272 s->last_print += s->print_interval; // keep the numbers tidy
273 uct_progress_status(u, ctx->t, color, s->last_print, NULL);
276 if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) {
277 if (UDEBUGL(2))
278 fprintf(stderr, "memory limit hit (%lu > %lu)\n",
279 ctx->t->nodes_size, u->max_tree_size);
280 s->fullmem = true;
285 /* Determine whether we should terminate the search early. */
286 static bool
287 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
288 struct time_info *ti, struct time_stop *stop,
289 struct tree_node *best, struct tree_node *best2,
290 int played, bool fullmem)
292 /* If the memory is full, stop immediately. Since the tree
293 * cannot grow anymore, some non-well-expanded nodes will
294 * quickly take over with extremely high ratio since the
295 * counters are not properly simulated (just as if we use
296 * non-UCT MonteCarlo). */
297 /* (XXX: A proper solution would be to prune the tree
298 * on the spot.) */
299 if (fullmem)
300 return true;
302 /* Think at least 100ms to avoid a random move. This is particularly
303 * important in distributed mode, where this function is called frequently. */
304 double elapsed = 0.0;
305 if (ti->dim == TD_WALLTIME) {
306 elapsed = time_now() - ti->len.t.timer_start;
307 if (elapsed < TREE_BUSYWAIT_INTERVAL) return false;
310 /* Break early if we estimate the second-best move cannot
311 * catch up in assigned time anymore. We use all our time
312 * if we are in byoyomi with single stone remaining in our
313 * period, however - it's better to pre-ponder. */
314 bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
315 if (best2 && ti->dim == TD_WALLTIME
316 && played >= PLAYOUT_EARLY_BREAK_MIN && !time_indulgent) {
317 double remaining = stop->worst.time - elapsed;
318 double pps = ((double)played) / elapsed;
319 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
320 if (best->u.playouts > best2->u.playouts + estplayouts) {
321 if (UDEBUGL(2))
322 fprintf(stderr, "Early stop, result cannot change: "
323 "best %d, best2 %d, estimated %f simulations to go (%d/%f=%f pps)\n",
324 best->u.playouts, best2->u.playouts, estplayouts, played, elapsed, pps);
325 return true;
329 /* Early break in won situation. */
330 if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN
331 && (ti->dim != TD_WALLTIME || elapsed > TIME_EARLY_BREAK_MIN)
332 && tree_node_get_value(t, 1, best->u.value) >= u->sure_win_threshold) {
333 return true;
336 return false;
339 /* Determine whether we should terminate the search later than expected. */
340 static bool
341 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
342 struct time_info *ti, struct time_stop *stop,
343 struct tree_node *best, struct tree_node *best2,
344 struct tree_node *bestr, struct tree_node *winner, int i)
346 if (!best) {
347 if (UDEBUGL(2))
348 fprintf(stderr, "Did not find best move, still trying...\n");
349 return true;
352 /* Do not waste time if we are winning. Spend up to worst time if
353 * we are unsure, but only desired time if we are sure of winning. */
354 floating_t beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
355 if (ti->dim == TD_WALLTIME && beta > 0) {
356 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
357 double elapsed = time_now() - ti->len.t.timer_start;
358 if (elapsed > good_enough) return false;
361 if (u->best2_ratio > 0) {
362 /* Check best/best2 simulations ratio. If the
363 * two best moves give very similar results,
364 * keep simulating. */
365 if (best2 && best2->u.playouts
366 && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
367 if (UDEBUGL(2))
368 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
369 (double)best->u.playouts / best2->u.playouts,
370 u->best2_ratio);
371 return true;
375 if (u->bestr_ratio > 0) {
376 /* Check best, best_best value difference. If the best move
377 * and its best child do not give similar enough results,
378 * keep simulating. */
379 if (bestr && bestr->u.playouts
380 && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
381 if (UDEBUGL(2))
382 fprintf(stderr, "Bestr delta %f > threshold %f\n",
383 fabs((double)best->u.value - bestr->u.value),
384 u->bestr_ratio);
385 return true;
389 if (winner && winner != best) {
390 /* Keep simulating if best explored
391 * does not have also highest value. */
392 if (UDEBUGL(2))
393 fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
394 coord2sstr(node_coord(best), t->board),
395 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
396 coord2sstr(node_coord(winner), t->board),
397 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
398 return true;
401 /* No reason to keep simulating, bye. */
402 return false;
405 bool
406 uct_search_check_stop(struct uct *u, struct board *b, enum stone color,
407 struct tree *t, struct time_info *ti,
408 struct uct_search_state *s, int i)
410 struct uct_thread_ctx *ctx = s->ctx;
412 /* Never consider stopping if we played too few simulations.
413 * Maybe we risk losing on time when playing in super-extreme
414 * time pressure but the tree is going to be just too messed
415 * up otherwise - we might even play invalid suicides or pass
416 * when we mustn't. */
417 assert(!(ti->dim == TD_GAMES && ti->len.games < GJ_MINGAMES));
418 if (i < GJ_MINGAMES)
419 return false;
421 struct tree_node *best = NULL;
422 struct tree_node *best2 = NULL; // Second-best move.
423 struct tree_node *bestr = NULL; // best's best child.
424 struct tree_node *winner = NULL;
426 best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
427 if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, node_coord(best));
429 /* Possibly stop search early if it's no use to try on. */
430 int played = u->played_all + i - s->base_playouts;
431 if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played, s->fullmem))
432 return true;
434 /* Check against time settings. */
435 bool desired_done;
436 if (ti->dim == TD_WALLTIME) {
437 double elapsed = time_now() - ti->len.t.timer_start;
438 if (elapsed > s->stop.worst.time) return true;
439 desired_done = elapsed > s->stop.desired.time;
441 } else { assert(ti->dim == TD_GAMES);
442 if (i > s->stop.worst.playouts) return true;
443 desired_done = i > s->stop.desired.playouts;
446 /* We want to stop simulating, but are willing to keep trying
447 * if we aren't completely sure about the winner yet. */
448 if (desired_done) {
449 if (u->policy->winner && u->policy->evaluate) {
450 struct uct_descent descent = { .node = ctx->t->root };
451 u->policy->winner(u->policy, ctx->t, &descent);
452 winner = descent.node;
454 if (best)
455 bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
456 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i))
457 return true;
460 /* TODO: Early break if best->variance goes under threshold
461 * and we already have enough playouts (possibly thanks to tbook
462 * or to pondering)? */
463 return false;
467 struct tree_node *
468 uct_search_result(struct uct *u, struct board *b, enum stone color,
469 bool pass_all_alive, int played_games, int base_playouts,
470 coord_t *best_coord)
472 /* Choose the best move from the tree. */
473 struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
474 if (!best) {
475 *best_coord = pass;
476 return NULL;
478 *best_coord = node_coord(best);
479 if (UDEBUGL(1))
480 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
481 coord2sstr(node_coord(best), b), coord_x(node_coord(best), b), coord_y(node_coord(best), b),
482 tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
483 u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
484 u->t->extra_komi);
486 /* Do not resign if we're so short of time that evaluation of best
487 * move is completely unreliable, we might be winning actually.
488 * In this case best is almost random but still better than resign.
489 * Also do not resign if we are getting bad results while actually
490 * giving away extra komi points (dynkomi). */
491 if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_threshold
492 && !is_pass(node_coord(best)) && best->u.playouts > GJ_MINGAMES
493 && (!u->t->use_extra_komi || komi_by_color(u->t->extra_komi, color) < 0.5)) {
494 *best_coord = resign;
495 return NULL;
498 /* If the opponent just passed and we win counting, always
499 * pass as well. For option stones_only, we pass only when there
500 * there is nothing else to do, to show how to maximize score. */
501 if (b->moves > 1 && is_pass(b->last_move.coord) && b->rules != RULES_STONES_ONLY) {
502 if (uct_pass_is_safe(u, b, color, pass_all_alive)) {
503 if (UDEBUGL(0))
504 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
505 board_official_score(b, NULL) / 2);
506 *best_coord = pass;
507 best = u->t->root->children; // pass is the first child
508 assert(is_pass(node_coord(best)));
509 return best;
510 } else {
511 if (UDEBUGL(3))
512 fprintf(stderr, "Refusing to pass, unsafe; pass_all_alive %d, ownermap #playouts %d, raw score %f\n",
513 pass_all_alive, u->ownermap.playouts,
514 board_official_score(b, NULL) / 2);
518 return best;