uct_search_state.print_fullmem -> fullmem
[pachi/derm.git] / uct / search.c
blob1c9447467c0adea44421e45e9e3e41eb110fb4b1
1 #include <assert.h>
2 #include <math.h>
3 #include <pthread.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
10 #define DEBUG
12 #include "debug.h"
13 #include "distributed/distributed.h"
14 #include "move.h"
15 #include "random.h"
16 #include "timeinfo.h"
17 #include "uct/dynkomi.h"
18 #include "uct/internal.h"
19 #include "uct/search.h"
20 #include "uct/tree.h"
21 #include "uct/uct.h"
22 #include "uct/walk.h"
25 /* Default number of simulations to perform per move.
26 * Note that this is now in total over all threads! (Unless TM_ROOT.) */
27 #define MC_GAMES 80000
28 static const struct time_info default_ti = {
29 .period = TT_MOVE,
30 .dim = TD_GAMES,
31 .len = { .games = MC_GAMES },
34 /* Once per how many simulations (per thread) to show a progress report line. */
35 #define TREE_SIMPROGRESS_INTERVAL 10000
37 /* When terminating UCT search early, the safety margin to add to the
38 * remaining playout number estimate when deciding whether the result can
39 * still change. */
40 #define PLAYOUT_DELTA_SAFEMARGIN 1000
42 /* Minimal number of simulations to consider early break. */
43 #define PLAYOUT_EARLY_BREAK_MIN 2500
46 /* Pachi threading structure:
48 * main thread
49 * | main(), GTP communication, ...
50 * | starts and stops the search managed by thread_manager
51 * |
52 * thread_manager
53 * | spawns and collects worker threads
54 * |
55 * worker0
56 * worker1
57 * ...
58 * workerK
59 * uct_playouts() loop, doing descend-playout until uct_halt
61 * Another way to look at it is by functions (lines denote thread boundaries):
63 * | uct_genmove()
64 * | uct_search() (uct_search_start() .. uct_search_stop())
65 * | -----------------------
66 * | spawn_thread_manager()
67 * | -----------------------
68 * | spawn_worker()
69 * V uct_playouts() */
71 /* Set in thread manager in case the workers should stop. */
72 volatile sig_atomic_t uct_halt = 0;
73 /* ID of the running worker thread. */
74 __thread int thread_id = -1;
75 /* ID of the thread manager. */
76 static pthread_t thread_manager;
77 bool thread_manager_running;
79 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
80 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
81 static volatile int finish_thread;
82 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
84 static void *
85 spawn_worker(void *ctx_)
87 struct uct_thread_ctx *ctx = ctx_;
88 /* Setup */
89 fast_srandom(ctx->seed);
90 thread_id = ctx->tid;
91 /* Run */
92 ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t);
93 /* Finish */
94 pthread_mutex_lock(&finish_serializer);
95 pthread_mutex_lock(&finish_mutex);
96 finish_thread = ctx->tid;
97 pthread_cond_signal(&finish_cond);
98 pthread_mutex_unlock(&finish_mutex);
99 return ctx;
102 /* Thread manager, controlling worker threads. It must be called with
103 * finish_mutex lock held, but it will unlock it itself before exiting;
104 * this is necessary to be completely deadlock-free. */
105 /* The finish_cond can be signalled for it to stop; in that case,
106 * the caller should set finish_thread = -1. */
107 /* After it is started, it will update mctx->t to point at some tree
108 * used for the actual search (matters only for TM_ROOT), on return
109 * it will set mctx->games to the number of performed simulations. */
110 static void *
111 spawn_thread_manager(void *ctx_)
113 /* In thread_manager, we use only some of the ctx fields. */
114 struct uct_thread_ctx *mctx = ctx_;
115 struct uct *u = mctx->u;
116 struct tree *t = mctx->t;
117 bool shared_tree = u->parallel_tree;
118 fast_srandom(mctx->seed);
120 int played_games = 0;
121 pthread_t threads[u->threads];
122 int joined = 0;
124 uct_halt = 0;
126 /* Garbage collect the tree by preference when pondering. */
127 if (u->pondering && t->nodes && t->nodes_size > t->max_tree_size/2) {
128 unsigned long temp_size = (MIN_FREE_MEM_PERCENT * t->max_tree_size) / 100;
129 t->root = tree_garbage_collect(t, temp_size, t->root);
132 /* Spawn threads... */
133 for (int ti = 0; ti < u->threads; ti++) {
134 struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx));
135 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
136 mctx->t = ctx->t = shared_tree ? t : tree_copy(t);
137 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
138 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
139 if (UDEBUGL(3))
140 fprintf(stderr, "Spawned worker %d\n", ti);
143 /* ...and collect them back: */
144 while (joined < u->threads) {
145 /* Wait for some thread to finish... */
146 pthread_cond_wait(&finish_cond, &finish_mutex);
147 if (finish_thread < 0) {
148 /* Stop-by-caller. Tell the workers to wrap up. */
149 uct_halt = 1;
150 continue;
152 /* ...and gather its remnants. */
153 struct uct_thread_ctx *ctx;
154 pthread_join(threads[finish_thread], (void **) &ctx);
155 played_games += ctx->games;
156 joined++;
157 if (!shared_tree) {
158 if (ctx->t == mctx->t) mctx->t = t;
159 tree_merge(t, ctx->t);
160 tree_done(ctx->t);
162 free(ctx);
163 if (UDEBUGL(3))
164 fprintf(stderr, "Joined worker %d\n", finish_thread);
165 pthread_mutex_unlock(&finish_serializer);
168 pthread_mutex_unlock(&finish_mutex);
170 if (!shared_tree)
171 tree_normalize(mctx->t, u->threads);
173 mctx->games = played_games;
174 return mctx;
178 /*** THREAD MANAGER end */
180 /*** Search infrastructure: */
184 uct_search_games(struct uct_search_state *s)
186 return s->ctx->t->root->u.playouts;
189 void
190 uct_search_start(struct uct *u, struct board *b, enum stone color,
191 struct tree *t, struct time_info *ti,
192 struct uct_search_state *s)
194 /* Set up search state. */
195 s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts;
196 s->print_interval = TREE_SIMPROGRESS_INTERVAL * (u->thread_model == TM_ROOT ? 1 : u->threads);
197 s->fullmem = false;
199 if (ti) {
200 if (ti->period == TT_NULL) *ti = default_ti;
201 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, &s->stop);
204 /* Fire up the tree search thread manager, which will in turn
205 * spawn the searching threads. */
206 assert(u->threads > 0);
207 assert(!thread_manager_running);
208 static struct uct_thread_ctx mctx;
209 mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536) };
210 s->ctx = &mctx;
211 pthread_mutex_lock(&finish_mutex);
212 pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx);
213 thread_manager_running = true;
216 struct uct_thread_ctx *
217 uct_search_stop(void)
219 assert(thread_manager_running);
221 /* Signal thread manager to stop the workers. */
222 pthread_mutex_lock(&finish_mutex);
223 finish_thread = -1;
224 pthread_cond_signal(&finish_cond);
225 pthread_mutex_unlock(&finish_mutex);
227 /* Collect the thread manager. */
228 struct uct_thread_ctx *pctx;
229 thread_manager_running = false;
230 pthread_join(thread_manager, (void **) &pctx);
231 return pctx;
235 void
236 uct_search_progress(struct uct *u, struct board *b, enum stone color,
237 struct tree *t, struct time_info *ti,
238 struct uct_search_state *s, int i)
240 struct uct_thread_ctx *ctx = s->ctx;
242 /* Adjust dynkomi? */
243 if (ctx->t->use_extra_komi && u->dynkomi->permove
244 && !u->pondering && u->dynkomi_interval
245 && i > s->last_dynkomi + u->dynkomi_interval) {
246 s->last_dynkomi += u->dynkomi_interval;
247 float old_dynkomi = ctx->t->extra_komi;
248 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
249 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
250 fprintf(stderr, "dynkomi adjusted (%f -> %f)\n",
251 old_dynkomi, ctx->t->extra_komi);
254 /* Print progress? */
255 if (i - s->last_print > s->print_interval) {
256 s->last_print += s->print_interval; // keep the numbers tidy
257 uct_progress_status(u, ctx->t, color, s->last_print);
260 if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) {
261 if (UDEBUGL(2))
262 fprintf(stderr, "memory limit hit (%lu > %lu)\n",
263 ctx->t->nodes_size, u->max_tree_size);
264 s->fullmem = true;
269 /* Determine whether we should terminate the search early. */
270 static bool
271 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
272 struct time_info *ti, struct time_stop *stop,
273 struct tree_node *best, struct tree_node *best2,
274 int played)
276 /* Break early if we estimate the second-best move cannot
277 * catch up in assigned time anymore. We use all our time
278 * if we are in byoyomi with single stone remaining in our
279 * period, however - it's better to pre-ponder. */
280 bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
281 if (best2 && ti->dim == TD_WALLTIME && !time_indulgent) {
282 double elapsed = time_now() - ti->len.t.timer_start;
283 double remaining = stop->worst.time - elapsed;
284 double pps = ((double)played) / elapsed;
285 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
286 if (best->u.playouts > best2->u.playouts + estplayouts) {
287 if (UDEBUGL(2))
288 fprintf(stderr, "Early stop, result cannot change: "
289 "best %d, best2 %d, estimated %f simulations to go\n",
290 best->u.playouts, best2->u.playouts, estplayouts);
291 return true;
295 /* Early break in won situation. */
296 if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN
297 && tree_node_get_value(t, 1, best->u.value) >= u->loss_threshold) {
298 /* Still use at least half the desired time (top-bounded by
299 * a reasonably confident number of simulations). It is silly
300 * to lose a won game because we played a bad move in 0.1s. */
301 double elapsed = 0;
302 if (ti->dim == TD_WALLTIME) {
303 elapsed = time_now() - ti->len.t.timer_start;
304 return elapsed >= 0.5 * stop->desired.time;
305 } else {
306 return true;
310 return false;
313 /* Determine whether we should terminate the search later than expected. */
314 static bool
315 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
316 struct time_info *ti, struct time_stop *stop,
317 struct tree_node *best, struct tree_node *best2,
318 struct tree_node *bestr, struct tree_node *winner, int i)
320 if (!best) {
321 if (UDEBUGL(2))
322 fprintf(stderr, "Did not find best move, still trying...\n");
323 return true;
326 /* Do not waste time if we are winning. Spend up to worst time if
327 * we are unsure, but only desired time if we are sure of winning. */
328 float beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
329 if (ti->dim == TD_WALLTIME && beta > 0) {
330 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
331 double elapsed = time_now() - ti->len.t.timer_start;
332 if (elapsed > good_enough) return false;
335 if (u->best2_ratio > 0) {
336 /* Check best/best2 simulations ratio. If the
337 * two best moves give very similar results,
338 * keep simulating. */
339 if (best2 && best2->u.playouts
340 && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
341 if (UDEBUGL(2))
342 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
343 (double)best->u.playouts / best2->u.playouts,
344 u->best2_ratio);
345 return true;
349 if (u->bestr_ratio > 0) {
350 /* Check best, best_best value difference. If the best move
351 * and its best child do not give similar enough results,
352 * keep simulating. */
353 if (bestr && bestr->u.playouts
354 && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
355 if (UDEBUGL(2))
356 fprintf(stderr, "Bestr delta %f > threshold %f\n",
357 fabs((double)best->u.value - bestr->u.value),
358 u->bestr_ratio);
359 return true;
363 if (winner && winner != best) {
364 /* Keep simulating if best explored
365 * does not have also highest value. */
366 if (UDEBUGL(2))
367 fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
368 coord2sstr(best->coord, t->board),
369 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
370 coord2sstr(winner->coord, t->board),
371 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
372 return true;
375 /* No reason to keep simulating, bye. */
376 return false;
379 bool
380 uct_search_check_stop(struct uct *u, struct board *b, enum stone color,
381 struct tree *t, struct time_info *ti,
382 struct uct_search_state *s, int i)
384 struct uct_thread_ctx *ctx = s->ctx;
386 /* Never consider stopping if we played too few simulations.
387 * Maybe we risk losing on time when playing in super-extreme
388 * time pressure but the tree is going to be just too messed
389 * up otherwise - we might even play invalid suicides or pass
390 * when we mustn't. */
391 if (i < GJ_MINGAMES)
392 return false;
394 struct tree_node *best = NULL;
395 struct tree_node *best2 = NULL; // Second-best move.
396 struct tree_node *bestr = NULL; // best's best child.
397 struct tree_node *winner = NULL;
399 best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
400 if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, best->coord);
402 /* Possibly stop search early if it's no use to try on. */
403 int played = u->played_all + i - s->base_playouts;
404 if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played))
405 return true;
407 /* Check against time settings. */
408 bool desired_done;
409 if (ti->dim == TD_WALLTIME) {
410 double elapsed = time_now() - ti->len.t.timer_start;
411 if (elapsed > s->stop.worst.time) return true;
412 desired_done = elapsed > s->stop.desired.time;
414 } else { assert(ti->dim == TD_GAMES);
415 if (i > s->stop.worst.playouts) return true;
416 desired_done = i > s->stop.desired.playouts;
419 /* We want to stop simulating, but are willing to keep trying
420 * if we aren't completely sure about the winner yet. */
421 if (desired_done) {
422 if (u->policy->winner && u->policy->evaluate) {
423 struct uct_descent descent = { .node = ctx->t->root };
424 u->policy->winner(u->policy, ctx->t, &descent);
425 winner = descent.node;
427 if (best)
428 bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
429 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i))
430 return true;
433 /* TODO: Early break if best->variance goes under threshold
434 * and we already have enough playouts (possibly thanks to book
435 * or to pondering)? */
436 return false;
440 struct tree_node *
441 uct_search_result(struct uct *u, struct board *b, enum stone color,
442 bool pass_all_alive, int played_games, int base_playouts,
443 coord_t *best_coord)
445 /* Choose the best move from the tree. */
446 struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
447 if (!best) {
448 *best_coord = pass;
449 return NULL;
451 *best_coord = best->coord;
452 if (UDEBUGL(1))
453 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
454 coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
455 tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
456 u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
457 u->t->extra_komi);
459 /* Do not resign if we're so short of time that evaluation of best
460 * move is completely unreliable, we might be winning actually.
461 * In this case best is almost random but still better than resign.
462 * Also do not resign if we are getting bad results while actually
463 * giving away extra komi points (dynkomi). */
464 if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio
465 && !is_pass(best->coord) && best->u.playouts > GJ_MINGAMES
466 && komi_by_color(u->t->extra_komi, color) < 0.5) {
467 *best_coord = resign;
468 return NULL;
471 /* If the opponent just passed and we win counting, always
472 * pass as well. */
473 if (b->moves > 1 && is_pass(b->last_move.coord)) {
474 /* Make sure enough playouts are simulated. */
475 while (u->ownermap.playouts < GJ_MINGAMES)
476 uct_playout(u, b, color, u->t);
477 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
478 if (UDEBUGL(0))
479 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
480 board_official_score(b, NULL) / 2);
481 *best_coord = pass;
482 return NULL;
486 return best;