14 #include "distributed/distributed.h"
18 #include "uct/dynkomi.h"
19 #include "uct/internal.h"
20 #include "uct/search.h"
26 /* Default time settings for the UCT engine. In distributed mode, slaves are
27 * unlimited by default and all control is done on the master, either in time
28 * or with total number of playouts over all slaves. (It is also possible but
29 * not recommended to limit only the slaves; the master then decides the move
30 * when a majority of slaves have made their choice.) */
31 static struct time_info default_ti
;
32 static __attribute__((constructor
)) void
35 time_parse(&default_ti
, "15");
38 static const struct time_info unlimited_ti
= {
41 .len
= { .games
= INT_MAX
},
44 /* When terminating UCT search early, the safety margin to add to the
45 * remaining playout number estimate when deciding whether the result can
47 #define PLAYOUT_DELTA_SAFEMARGIN 1000
49 /* Minimal number of simulations to consider early break. */
50 #define PLAYOUT_EARLY_BREAK_MIN 5000
52 /* Minimal time to consider early break (in seconds). */
53 #define TIME_EARLY_BREAK_MIN 1.0
56 /* Pachi threading structure:
59 * | main(), GTP communication, ...
60 * | starts and stops the search managed by thread_manager
63 * | spawns and collects worker threads
69 * uct_playouts() loop, doing descend-playout until uct_halt
71 * Another way to look at it is by functions (lines denote thread boundaries):
74 * | uct_search() (uct_search_start() .. uct_search_stop())
75 * | -----------------------
76 * | spawn_thread_manager()
77 * | -----------------------
81 /* Set in thread manager in case the workers should stop. */
82 volatile sig_atomic_t uct_halt
= 0;
83 /* ID of the thread manager. */
84 static pthread_t thread_manager
;
85 bool thread_manager_running
;
87 static pthread_mutex_t finish_mutex
= PTHREAD_MUTEX_INITIALIZER
;
88 static pthread_cond_t finish_cond
= PTHREAD_COND_INITIALIZER
;
89 static volatile int finish_thread
;
90 static pthread_mutex_t finish_serializer
= PTHREAD_MUTEX_INITIALIZER
;
93 spawn_worker(void *ctx_
)
95 struct uct_thread_ctx
*ctx
= ctx_
;
97 fast_srandom(ctx
->seed
);
99 ctx
->games
= uct_playouts(ctx
->u
, ctx
->b
, ctx
->color
, ctx
->t
, ctx
->ti
);
101 pthread_mutex_lock(&finish_serializer
);
102 pthread_mutex_lock(&finish_mutex
);
103 finish_thread
= ctx
->tid
;
104 pthread_cond_signal(&finish_cond
);
105 pthread_mutex_unlock(&finish_mutex
);
109 /* Thread manager, controlling worker threads. It must be called with
110 * finish_mutex lock held, but it will unlock it itself before exiting;
111 * this is necessary to be completely deadlock-free. */
112 /* The finish_cond can be signalled for it to stop; in that case,
113 * the caller should set finish_thread = -1. */
114 /* After it is started, it will update mctx->t to point at some tree
115 * used for the actual search, on return
116 * it will set mctx->games to the number of performed simulations. */
118 spawn_thread_manager(void *ctx_
)
120 /* In thread_manager, we use only some of the ctx fields. */
121 struct uct_thread_ctx
*mctx
= ctx_
;
122 struct uct
*u
= mctx
->u
;
123 struct tree
*t
= mctx
->t
;
124 fast_srandom(mctx
->seed
);
126 int played_games
= 0;
127 pthread_t threads
[u
->threads
];
132 /* Garbage collect the tree by preference when pondering. */
133 if (u
->pondering
&& t
->nodes
&& t
->nodes_size
>= t
->pruning_threshold
) {
134 t
->root
= tree_garbage_collect(t
, t
->root
);
137 /* Spawn threads... */
138 for (int ti
= 0; ti
< u
->threads
; ti
++) {
139 struct uct_thread_ctx
*ctx
= malloc2(sizeof(*ctx
));
140 ctx
->u
= u
; ctx
->b
= mctx
->b
; ctx
->color
= mctx
->color
;
141 mctx
->t
= ctx
->t
= t
;
142 ctx
->tid
= ti
; ctx
->seed
= fast_random(65536) + ti
;
145 pthread_attr_init(&a
);
146 pthread_attr_setstacksize(&a
, 1048576);
147 pthread_create(&threads
[ti
], &a
, spawn_worker
, ctx
);
149 fprintf(stderr
, "Spawned worker %d\n", ti
);
152 /* ...and collect them back: */
153 while (joined
< u
->threads
) {
154 /* Wait for some thread to finish... */
155 pthread_cond_wait(&finish_cond
, &finish_mutex
);
156 if (finish_thread
< 0) {
157 /* Stop-by-caller. Tell the workers to wrap up
158 * and unblock them from terminating. */
160 /* We need to make sure the workers do not complete
161 * the termination sequence before we get officially
162 * stopped - their wake and the stop wake could get
164 pthread_mutex_unlock(&finish_serializer
);
167 /* ...and gather its remnants. */
168 struct uct_thread_ctx
*ctx
;
169 pthread_join(threads
[finish_thread
], (void **) &ctx
);
170 played_games
+= ctx
->games
;
174 fprintf(stderr
, "Joined worker %d\n", finish_thread
);
175 pthread_mutex_unlock(&finish_serializer
);
178 pthread_mutex_unlock(&finish_mutex
);
180 mctx
->games
= played_games
;
185 /*** THREAD MANAGER end */
187 /*** Search infrastructure: */
191 uct_search_games(struct uct_search_state
*s
)
193 return s
->ctx
->t
->root
->u
.playouts
;
197 uct_search_start(struct uct
*u
, struct board
*b
, enum stone color
,
198 struct tree
*t
, struct time_info
*ti
,
199 struct uct_search_state
*s
)
201 /* Set up search state. */
202 s
->base_playouts
= s
->last_dynkomi
= s
->last_print
= t
->root
->u
.playouts
;
203 s
->print_interval
= u
->reportfreq
* u
->threads
;
207 if (ti
->period
== TT_NULL
) {
212 time_start_timer(ti
);
215 time_stop_conditions(ti
, b
, u
->fuseki_end
, u
->yose_start
, u
->max_maintime_ratio
, &s
->stop
);
218 /* Fire up the tree search thread manager, which will in turn
219 * spawn the searching threads. */
220 assert(u
->threads
> 0);
221 assert(!thread_manager_running
);
222 static struct uct_thread_ctx mctx
;
223 mctx
= (struct uct_thread_ctx
) { .u
= u
, .b
= b
, .color
= color
, .t
= t
, .seed
= fast_random(65536), .ti
= ti
};
225 pthread_mutex_lock(&finish_serializer
);
226 pthread_mutex_lock(&finish_mutex
);
227 pthread_create(&thread_manager
, NULL
, spawn_thread_manager
, s
->ctx
);
228 thread_manager_running
= true;
231 struct uct_thread_ctx
*
232 uct_search_stop(void)
234 assert(thread_manager_running
);
236 /* Signal thread manager to stop the workers. */
237 pthread_mutex_lock(&finish_mutex
);
239 pthread_cond_signal(&finish_cond
);
240 pthread_mutex_unlock(&finish_mutex
);
242 /* Collect the thread manager. */
243 struct uct_thread_ctx
*pctx
;
244 thread_manager_running
= false;
245 pthread_join(thread_manager
, (void **) &pctx
);
251 uct_search_progress(struct uct
*u
, struct board
*b
, enum stone color
,
252 struct tree
*t
, struct time_info
*ti
,
253 struct uct_search_state
*s
, int i
)
255 struct uct_thread_ctx
*ctx
= s
->ctx
;
257 /* Adjust dynkomi? */
258 int di
= u
->dynkomi_interval
* u
->threads
;
259 if (ctx
->t
->use_extra_komi
&& u
->dynkomi
->permove
260 && !u
->pondering
&& di
261 && i
> s
->last_dynkomi
+ di
) {
262 s
->last_dynkomi
+= di
;
263 floating_t old_dynkomi
= ctx
->t
->extra_komi
;
264 ctx
->t
->extra_komi
= u
->dynkomi
->permove(u
->dynkomi
, b
, ctx
->t
);
265 if (UDEBUGL(3) && old_dynkomi
!= ctx
->t
->extra_komi
)
266 fprintf(stderr
, "dynkomi adjusted (%f -> %f)\n",
267 old_dynkomi
, ctx
->t
->extra_komi
);
270 /* Print progress? */
271 if (i
- s
->last_print
> s
->print_interval
) {
272 s
->last_print
+= s
->print_interval
; // keep the numbers tidy
273 uct_progress_status(u
, ctx
->t
, color
, s
->last_print
, NULL
);
276 if (!s
->fullmem
&& ctx
->t
->nodes_size
> u
->max_tree_size
) {
278 fprintf(stderr
, "memory limit hit (%lu > %lu)\n",
279 ctx
->t
->nodes_size
, u
->max_tree_size
);
285 /* Determine whether we should terminate the search early. */
287 uct_search_stop_early(struct uct
*u
, struct tree
*t
, struct board
*b
,
288 struct time_info
*ti
, struct time_stop
*stop
,
289 struct tree_node
*best
, struct tree_node
*best2
,
290 int played
, bool fullmem
)
292 /* If the memory is full, stop immediately. Since the tree
293 * cannot grow anymore, some non-well-expanded nodes will
294 * quickly take over with extremely high ratio since the
295 * counters are not properly simulated (just as if we use
296 * non-UCT MonteCarlo). */
297 /* (XXX: A proper solution would be to prune the tree
302 /* Think at least 100ms to avoid a random move. This is particularly
303 * important in distributed mode, where this function is called frequently. */
304 double elapsed
= 0.0;
305 if (ti
->dim
== TD_WALLTIME
) {
306 elapsed
= time_now() - ti
->len
.t
.timer_start
;
307 if (elapsed
< TREE_BUSYWAIT_INTERVAL
) return false;
310 /* Break early if we estimate the second-best move cannot
311 * catch up in assigned time anymore. We use all our time
312 * if we are in byoyomi with single stone remaining in our
313 * period, however - it's better to pre-ponder. */
314 bool time_indulgent
= (!ti
->len
.t
.main_time
&& ti
->len
.t
.byoyomi_stones
== 1);
315 if (best2
&& ti
->dim
== TD_WALLTIME
316 && played
>= PLAYOUT_EARLY_BREAK_MIN
&& !time_indulgent
) {
317 double remaining
= stop
->worst
.time
- elapsed
;
318 double pps
= ((double)played
) / elapsed
;
319 double estplayouts
= remaining
* pps
+ PLAYOUT_DELTA_SAFEMARGIN
;
320 if (best
->u
.playouts
> best2
->u
.playouts
+ estplayouts
) {
322 fprintf(stderr
, "Early stop, result cannot change: "
323 "best %d, best2 %d, estimated %f simulations to go (%d/%f=%f pps)\n",
324 best
->u
.playouts
, best2
->u
.playouts
, estplayouts
, played
, elapsed
, pps
);
329 /* Early break in won situation. */
330 if (best
->u
.playouts
>= PLAYOUT_EARLY_BREAK_MIN
331 && (ti
->dim
!= TD_WALLTIME
|| elapsed
> TIME_EARLY_BREAK_MIN
)
332 && tree_node_get_value(t
, 1, best
->u
.value
) >= u
->sure_win_threshold
) {
339 /* Determine whether we should terminate the search later than expected. */
341 uct_search_keep_looking(struct uct
*u
, struct tree
*t
, struct board
*b
,
342 struct time_info
*ti
, struct time_stop
*stop
,
343 struct tree_node
*best
, struct tree_node
*best2
,
344 struct tree_node
*bestr
, struct tree_node
*winner
, int i
)
348 fprintf(stderr
, "Did not find best move, still trying...\n");
352 /* Do not waste time if we are winning. Spend up to worst time if
353 * we are unsure, but only desired time if we are sure of winning. */
354 floating_t beta
= 2 * (tree_node_get_value(t
, 1, best
->u
.value
) - 0.5);
355 if (ti
->dim
== TD_WALLTIME
&& beta
> 0) {
356 double good_enough
= stop
->desired
.time
* beta
+ stop
->worst
.time
* (1 - beta
);
357 double elapsed
= time_now() - ti
->len
.t
.timer_start
;
358 if (elapsed
> good_enough
) return false;
361 if (u
->best2_ratio
> 0) {
362 /* Check best/best2 simulations ratio. If the
363 * two best moves give very similar results,
364 * keep simulating. */
365 if (best2
&& best2
->u
.playouts
366 && (double)best
->u
.playouts
/ best2
->u
.playouts
< u
->best2_ratio
) {
368 fprintf(stderr
, "Best2 ratio %f < threshold %f\n",
369 (double)best
->u
.playouts
/ best2
->u
.playouts
,
375 if (u
->bestr_ratio
> 0) {
376 /* Check best, best_best value difference. If the best move
377 * and its best child do not give similar enough results,
378 * keep simulating. */
379 if (bestr
&& bestr
->u
.playouts
380 && fabs((double)best
->u
.value
- bestr
->u
.value
) > u
->bestr_ratio
) {
382 fprintf(stderr
, "Bestr delta %f > threshold %f\n",
383 fabs((double)best
->u
.value
- bestr
->u
.value
),
389 if (winner
&& winner
!= best
) {
390 /* Keep simulating if best explored
391 * does not have also highest value. */
393 fprintf(stderr
, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i
,
394 coord2sstr(node_coord(best
), t
->board
),
395 best
->u
.playouts
, tree_node_get_value(t
, 1, best
->u
.value
),
396 coord2sstr(node_coord(winner
), t
->board
),
397 winner
->u
.playouts
, tree_node_get_value(t
, 1, winner
->u
.value
));
401 /* No reason to keep simulating, bye. */
406 uct_search_check_stop(struct uct
*u
, struct board
*b
, enum stone color
,
407 struct tree
*t
, struct time_info
*ti
,
408 struct uct_search_state
*s
, int i
)
410 struct uct_thread_ctx
*ctx
= s
->ctx
;
412 /* Never consider stopping if we played too few simulations.
413 * Maybe we risk losing on time when playing in super-extreme
414 * time pressure but the tree is going to be just too messed
415 * up otherwise - we might even play invalid suicides or pass
416 * when we mustn't. */
417 assert(!(ti
->dim
== TD_GAMES
&& ti
->len
.games
< GJ_MINGAMES
));
421 struct tree_node
*best
= NULL
;
422 struct tree_node
*best2
= NULL
; // Second-best move.
423 struct tree_node
*bestr
= NULL
; // best's best child.
424 struct tree_node
*winner
= NULL
;
426 best
= u
->policy
->choose(u
->policy
, ctx
->t
->root
, b
, color
, resign
);
427 if (best
) best2
= u
->policy
->choose(u
->policy
, ctx
->t
->root
, b
, color
, node_coord(best
));
429 /* Possibly stop search early if it's no use to try on. */
430 int played
= u
->played_all
+ i
- s
->base_playouts
;
431 if (best
&& uct_search_stop_early(u
, ctx
->t
, b
, ti
, &s
->stop
, best
, best2
, played
, s
->fullmem
))
434 /* Check against time settings. */
436 if (ti
->dim
== TD_WALLTIME
) {
437 double elapsed
= time_now() - ti
->len
.t
.timer_start
;
438 if (elapsed
> s
->stop
.worst
.time
) return true;
439 desired_done
= elapsed
> s
->stop
.desired
.time
;
441 } else { assert(ti
->dim
== TD_GAMES
);
442 if (i
> s
->stop
.worst
.playouts
) return true;
443 desired_done
= i
> s
->stop
.desired
.playouts
;
446 /* We want to stop simulating, but are willing to keep trying
447 * if we aren't completely sure about the winner yet. */
449 if (u
->policy
->winner
&& u
->policy
->evaluate
) {
450 struct uct_descent descent
= { .node
= ctx
->t
->root
};
451 u
->policy
->winner(u
->policy
, ctx
->t
, &descent
);
452 winner
= descent
.node
;
455 bestr
= u
->policy
->choose(u
->policy
, best
, b
, stone_other(color
), resign
);
456 if (!uct_search_keep_looking(u
, ctx
->t
, b
, ti
, &s
->stop
, best
, best2
, bestr
, winner
, i
))
460 /* TODO: Early break if best->variance goes under threshold
461 * and we already have enough playouts (possibly thanks to tbook
462 * or to pondering)? */
468 uct_search_result(struct uct
*u
, struct board
*b
, enum stone color
,
469 bool pass_all_alive
, int played_games
, int base_playouts
,
472 /* Choose the best move from the tree. */
473 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
, resign
);
478 *best_coord
= node_coord(best
);
480 fprintf(stderr
, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
481 coord2sstr(node_coord(best
), b
), coord_x(node_coord(best
), b
), coord_y(node_coord(best
), b
),
482 tree_node_get_value(u
->t
, 1, best
->u
.value
), best
->u
.playouts
,
483 u
->t
->root
->u
.playouts
, u
->t
->root
->u
.playouts
- base_playouts
, played_games
,
486 /* Do not resign if we're so short of time that evaluation of best
487 * move is completely unreliable, we might be winning actually.
488 * In this case best is almost random but still better than resign.
489 * Also do not resign if we are getting bad results while actually
490 * giving away extra komi points (dynkomi). */
491 if (tree_node_get_value(u
->t
, 1, best
->u
.value
) < u
->resign_threshold
492 && !is_pass(node_coord(best
)) && best
->u
.playouts
> GJ_MINGAMES
493 && (!u
->t
->use_extra_komi
|| komi_by_color(u
->t
->extra_komi
, color
) < 0.5)) {
494 *best_coord
= resign
;
498 /* If the opponent just passed and we win counting, always
499 * pass as well. For option stones_only, we pass only when there
500 * there is nothing else to do, to show how to maximize score. */
501 if (b
->moves
> 1 && is_pass(b
->last_move
.coord
) && b
->rules
!= RULES_STONES_ONLY
) {
502 if (uct_pass_is_safe(u
, b
, color
, pass_all_alive
)) {
504 fprintf(stderr
, "<Will rather pass, looks safe enough; score %f>\n",
505 board_official_score(b
, NULL
) / 2);
507 best
= u
->t
->root
->children
; // pass is the first child
508 assert(is_pass(node_coord(best
)));
512 fprintf(stderr
, "Refusing to pass, unsafe; pass_all_alive %d, ownermap #playouts %d, raw score %f\n",
513 pass_all_alive
, u
->ownermap
.playouts
,
514 board_official_score(b
, NULL
) / 2);