13 #include "distributed/distributed.h"
17 #include "uct/dynkomi.h"
18 #include "uct/internal.h"
19 #include "uct/search.h"
25 /* Default number of simulations to perform per move.
26 * Note that this is now in total over all threads! (Unless TM_ROOT.) */
27 #define MC_GAMES 80000
28 static const struct time_info default_ti
= {
31 .len
= { .games
= MC_GAMES
},
34 /* Once per how many simulations (per thread) to show a progress report line. */
35 #define TREE_SIMPROGRESS_INTERVAL 10000
37 /* When terminating UCT search early, the safety margin to add to the
38 * remaining playout number estimate when deciding whether the result can
40 #define PLAYOUT_DELTA_SAFEMARGIN 1000
43 /* Pachi threading structure:
46 * | main(), GTP communication, ...
47 * | starts and stops the search managed by thread_manager
50 * | spawns and collects worker threads
56 * uct_playouts() loop, doing descend-playout until uct_halt
58 * Another way to look at it is by functions (lines denote thread boundaries):
61 * | uct_search() (uct_search_start() .. uct_search_stop())
62 * | -----------------------
63 * | spawn_thread_manager()
64 * | -----------------------
68 /* Set in thread manager in case the workers should stop. */
69 volatile sig_atomic_t uct_halt
= 0;
70 /* ID of the running worker thread. */
71 __thread
int thread_id
= -1;
72 /* ID of the thread manager. */
73 static pthread_t thread_manager
;
74 bool thread_manager_running
;
76 static pthread_mutex_t finish_mutex
= PTHREAD_MUTEX_INITIALIZER
;
77 static pthread_cond_t finish_cond
= PTHREAD_COND_INITIALIZER
;
78 static volatile int finish_thread
;
79 static pthread_mutex_t finish_serializer
= PTHREAD_MUTEX_INITIALIZER
;
82 spawn_worker(void *ctx_
)
84 struct uct_thread_ctx
*ctx
= ctx_
;
86 fast_srandom(ctx
->seed
);
89 ctx
->games
= uct_playouts(ctx
->u
, ctx
->b
, ctx
->color
, ctx
->t
);
91 pthread_mutex_lock(&finish_serializer
);
92 pthread_mutex_lock(&finish_mutex
);
93 finish_thread
= ctx
->tid
;
94 pthread_cond_signal(&finish_cond
);
95 pthread_mutex_unlock(&finish_mutex
);
99 /* Thread manager, controlling worker threads. It must be called with
100 * finish_mutex lock held, but it will unlock it itself before exiting;
101 * this is necessary to be completely deadlock-free. */
102 /* The finish_cond can be signalled for it to stop; in that case,
103 * the caller should set finish_thread = -1. */
104 /* After it is started, it will update mctx->t to point at some tree
105 * used for the actual search (matters only for TM_ROOT), on return
106 * it will set mctx->games to the number of performed simulations. */
108 spawn_thread_manager(void *ctx_
)
110 /* In thread_manager, we use only some of the ctx fields. */
111 struct uct_thread_ctx
*mctx
= ctx_
;
112 struct uct
*u
= mctx
->u
;
113 struct tree
*t
= mctx
->t
;
114 bool shared_tree
= u
->parallel_tree
;
115 fast_srandom(mctx
->seed
);
117 int played_games
= 0;
118 pthread_t threads
[u
->threads
];
123 /* Garbage collect the tree by preference when pondering. */
124 if (u
->pondering
&& t
->nodes
&& t
->nodes_size
> t
->max_tree_size
/2) {
125 unsigned long temp_size
= (MIN_FREE_MEM_PERCENT
* t
->max_tree_size
) / 100;
126 t
->root
= tree_garbage_collect(t
, temp_size
, t
->root
);
129 /* Spawn threads... */
130 for (int ti
= 0; ti
< u
->threads
; ti
++) {
131 struct uct_thread_ctx
*ctx
= malloc2(sizeof(*ctx
));
132 ctx
->u
= u
; ctx
->b
= mctx
->b
; ctx
->color
= mctx
->color
;
133 mctx
->t
= ctx
->t
= shared_tree
? t
: tree_copy(t
);
134 ctx
->tid
= ti
; ctx
->seed
= fast_random(65536) + ti
;
135 pthread_create(&threads
[ti
], NULL
, spawn_worker
, ctx
);
137 fprintf(stderr
, "Spawned worker %d\n", ti
);
140 /* ...and collect them back: */
141 while (joined
< u
->threads
) {
142 /* Wait for some thread to finish... */
143 pthread_cond_wait(&finish_cond
, &finish_mutex
);
144 if (finish_thread
< 0) {
145 /* Stop-by-caller. Tell the workers to wrap up. */
149 /* ...and gather its remnants. */
150 struct uct_thread_ctx
*ctx
;
151 pthread_join(threads
[finish_thread
], (void **) &ctx
);
152 played_games
+= ctx
->games
;
155 if (ctx
->t
== mctx
->t
) mctx
->t
= t
;
156 tree_merge(t
, ctx
->t
);
161 fprintf(stderr
, "Joined worker %d\n", finish_thread
);
162 pthread_mutex_unlock(&finish_serializer
);
165 pthread_mutex_unlock(&finish_mutex
);
168 tree_normalize(mctx
->t
, u
->threads
);
170 mctx
->games
= played_games
;
175 /*** THREAD MANAGER end */
177 /*** Search infrastructure: */
181 uct_search_games(struct uct_search_state
*s
)
183 return s
->ctx
->t
->root
->u
.playouts
;
187 uct_search_start(struct uct
*u
, struct board
*b
, enum stone color
,
188 struct tree
*t
, struct time_info
*ti
,
189 struct uct_search_state
*s
)
191 /* Set up search state. */
192 s
->base_playouts
= s
->last_dynkomi
= s
->last_print
= t
->root
->u
.playouts
;
193 s
->print_interval
= TREE_SIMPROGRESS_INTERVAL
* (u
->thread_model
== TM_ROOT
? 1 : u
->threads
);
194 s
->print_fullmem
= false;
197 if (ti
->period
== TT_NULL
) *ti
= default_ti
;
198 time_stop_conditions(ti
, b
, u
->fuseki_end
, u
->yose_start
, &s
->stop
);
201 /* Fire up the tree search thread manager, which will in turn
202 * spawn the searching threads. */
203 assert(u
->threads
> 0);
204 assert(!thread_manager_running
);
205 static struct uct_thread_ctx mctx
;
206 mctx
= (struct uct_thread_ctx
) { .u
= u
, .b
= b
, .color
= color
, .t
= t
, .seed
= fast_random(65536) };
208 pthread_mutex_lock(&finish_mutex
);
209 pthread_create(&thread_manager
, NULL
, spawn_thread_manager
, s
->ctx
);
210 thread_manager_running
= true;
213 struct uct_thread_ctx
*
214 uct_search_stop(void)
216 assert(thread_manager_running
);
218 /* Signal thread manager to stop the workers. */
219 pthread_mutex_lock(&finish_mutex
);
221 pthread_cond_signal(&finish_cond
);
222 pthread_mutex_unlock(&finish_mutex
);
224 /* Collect the thread manager. */
225 struct uct_thread_ctx
*pctx
;
226 thread_manager_running
= false;
227 pthread_join(thread_manager
, (void **) &pctx
);
233 uct_search_progress(struct uct
*u
, struct board
*b
, enum stone color
,
234 struct tree
*t
, struct time_info
*ti
,
235 struct uct_search_state
*s
, int i
)
237 struct uct_thread_ctx
*ctx
= s
->ctx
;
239 /* Adjust dynkomi? */
240 if (ctx
->t
->use_extra_komi
&& u
->dynkomi
->permove
241 && u
->dynkomi_interval
242 && i
> s
->last_dynkomi
+ u
->dynkomi_interval
) {
243 s
->last_dynkomi
+= u
->dynkomi_interval
;
244 float old_dynkomi
= ctx
->t
->extra_komi
;
245 ctx
->t
->extra_komi
= u
->dynkomi
->permove(u
->dynkomi
, b
, ctx
->t
);
246 if (UDEBUGL(3) && old_dynkomi
!= ctx
->t
->extra_komi
)
247 fprintf(stderr
, "dynkomi adjusted (%f -> %f)\n",
248 old_dynkomi
, ctx
->t
->extra_komi
);
251 /* Print progress? */
252 if (i
- s
->last_print
> s
->print_interval
) {
253 s
->last_print
+= s
->print_interval
; // keep the numbers tidy
254 uct_progress_status(u
, ctx
->t
, color
, s
->last_print
);
257 if (!s
->print_fullmem
&& ctx
->t
->nodes_size
> u
->max_tree_size
) {
259 fprintf(stderr
, "memory limit hit (%lu > %lu)\n",
260 ctx
->t
->nodes_size
, u
->max_tree_size
);
261 s
->print_fullmem
= true;
266 /* Determine whether we should terminate the search early. */
268 uct_search_stop_early(struct uct
*u
, struct tree
*t
, struct board
*b
,
269 struct time_info
*ti
, struct time_stop
*stop
,
270 struct tree_node
*best
, struct tree_node
*best2
,
273 /* Always use at least half the desired time. It is silly
274 * to lose a won game because we played a bad move in 0.1s. */
276 if (ti
->dim
== TD_WALLTIME
) {
277 elapsed
= time_now() - ti
->len
.t
.timer_start
;
278 if (elapsed
< 0.5 * stop
->desired
.time
) return false;
281 /* Early break in won situation. */
282 if (best
->u
.playouts
>= 2000 && tree_node_get_value(t
, 1, best
->u
.value
) >= u
->loss_threshold
)
284 /* Earlier break in super-won situation. */
285 if (best
->u
.playouts
>= 500 && tree_node_get_value(t
, 1, best
->u
.value
) >= 0.95)
288 /* Break early if we estimate the second-best move cannot
289 * catch up in assigned time anymore. We use all our time
290 * if we are in byoyomi with single stone remaining in our
291 * period, however - it's better to pre-ponder. */
292 bool time_indulgent
= (!ti
->len
.t
.main_time
&& ti
->len
.t
.byoyomi_stones
== 1);
293 if (best2
&& ti
->dim
== TD_WALLTIME
&& !time_indulgent
) {
294 double remaining
= stop
->worst
.time
- elapsed
;
295 double pps
= ((double)played
) / elapsed
;
296 double estplayouts
= remaining
* pps
+ PLAYOUT_DELTA_SAFEMARGIN
;
297 if (best
->u
.playouts
> best2
->u
.playouts
+ estplayouts
) {
299 fprintf(stderr
, "Early stop, result cannot change: "
300 "best %d, best2 %d, estimated %f simulations to go\n",
301 best
->u
.playouts
, best2
->u
.playouts
, estplayouts
);
309 /* Determine whether we should terminate the search later than expected. */
311 uct_search_keep_looking(struct uct
*u
, struct tree
*t
, struct board
*b
,
312 struct time_info
*ti
, struct time_stop
*stop
,
313 struct tree_node
*best
, struct tree_node
*best2
,
314 struct tree_node
*bestr
, struct tree_node
*winner
, int i
)
318 fprintf(stderr
, "Did not find best move, still trying...\n");
322 /* Do not waste time if we are winning. Spend up to worst time if
323 * we are unsure, but only desired time if we are sure of winning. */
324 float beta
= 2 * (tree_node_get_value(t
, 1, best
->u
.value
) - 0.5);
325 if (ti
->dim
== TD_WALLTIME
&& beta
> 0) {
326 double good_enough
= stop
->desired
.time
* beta
+ stop
->worst
.time
* (1 - beta
);
327 double elapsed
= time_now() - ti
->len
.t
.timer_start
;
328 if (elapsed
> good_enough
) return false;
331 if (u
->best2_ratio
> 0) {
332 /* Check best/best2 simulations ratio. If the
333 * two best moves give very similar results,
334 * keep simulating. */
335 if (best2
&& best2
->u
.playouts
336 && (double)best
->u
.playouts
/ best2
->u
.playouts
< u
->best2_ratio
) {
338 fprintf(stderr
, "Best2 ratio %f < threshold %f\n",
339 (double)best
->u
.playouts
/ best2
->u
.playouts
,
345 if (u
->bestr_ratio
> 0) {
346 /* Check best, best_best value difference. If the best move
347 * and its best child do not give similar enough results,
348 * keep simulating. */
349 if (bestr
&& bestr
->u
.playouts
350 && fabs((double)best
->u
.value
- bestr
->u
.value
) > u
->bestr_ratio
) {
352 fprintf(stderr
, "Bestr delta %f > threshold %f\n",
353 fabs((double)best
->u
.value
- bestr
->u
.value
),
359 if (winner
&& winner
!= best
) {
360 /* Keep simulating if best explored
361 * does not have also highest value. */
363 fprintf(stderr
, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i
,
364 coord2sstr(best
->coord
, t
->board
),
365 best
->u
.playouts
, tree_node_get_value(t
, 1, best
->u
.value
),
366 coord2sstr(winner
->coord
, t
->board
),
367 winner
->u
.playouts
, tree_node_get_value(t
, 1, winner
->u
.value
));
371 /* No reason to keep simulating, bye. */
376 uct_search_check_stop(struct uct
*u
, struct board
*b
, enum stone color
,
377 struct tree
*t
, struct time_info
*ti
,
378 struct uct_search_state
*s
, int i
)
380 struct uct_thread_ctx
*ctx
= s
->ctx
;
382 /* Never consider stopping if we played too few simulations.
383 * Maybe we risk losing on time when playing in super-extreme
384 * time pressure but the tree is going to be just too messed
385 * up otherwise - we might even play invalid suicides or pass
386 * when we mustn't. */
390 struct tree_node
*best
= NULL
;
391 struct tree_node
*best2
= NULL
; // Second-best move.
392 struct tree_node
*bestr
= NULL
; // best's best child.
393 struct tree_node
*winner
= NULL
;
395 best
= u
->policy
->choose(u
->policy
, ctx
->t
->root
, b
, color
, resign
);
396 if (best
) best2
= u
->policy
->choose(u
->policy
, ctx
->t
->root
, b
, color
, best
->coord
);
398 /* Possibly stop search early if it's no use to try on. */
399 int played
= u
->played_all
+ i
- s
->base_playouts
;
400 if (best
&& uct_search_stop_early(u
, ctx
->t
, b
, ti
, &s
->stop
, best
, best2
, played
))
403 /* Check against time settings. */
405 if (ti
->dim
== TD_WALLTIME
) {
406 double elapsed
= time_now() - ti
->len
.t
.timer_start
;
407 if (elapsed
> s
->stop
.worst
.time
) return true;
408 desired_done
= elapsed
> s
->stop
.desired
.time
;
410 } else { assert(ti
->dim
== TD_GAMES
);
411 if (i
> s
->stop
.worst
.playouts
) return true;
412 desired_done
= i
> s
->stop
.desired
.playouts
;
415 /* We want to stop simulating, but are willing to keep trying
416 * if we aren't completely sure about the winner yet. */
418 if (u
->policy
->winner
&& u
->policy
->evaluate
) {
419 struct uct_descent descent
= { .node
= ctx
->t
->root
};
420 u
->policy
->winner(u
->policy
, ctx
->t
, &descent
);
421 winner
= descent
.node
;
424 bestr
= u
->policy
->choose(u
->policy
, best
, b
, stone_other(color
), resign
);
425 if (!uct_search_keep_looking(u
, ctx
->t
, b
, ti
, &s
->stop
, best
, best2
, bestr
, winner
, i
))
429 /* TODO: Early break if best->variance goes under threshold
430 * and we already have enough playouts (possibly thanks to book
431 * or to pondering)? */
437 uct_search_result(struct uct
*u
, struct board
*b
, enum stone color
,
438 bool pass_all_alive
, int played_games
, int base_playouts
,
441 /* Choose the best move from the tree. */
442 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
, resign
);
447 *best_coord
= best
->coord
;
449 fprintf(stderr
, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
450 coord2sstr(best
->coord
, b
), coord_x(best
->coord
, b
), coord_y(best
->coord
, b
),
451 tree_node_get_value(u
->t
, 1, best
->u
.value
), best
->u
.playouts
,
452 u
->t
->root
->u
.playouts
, u
->t
->root
->u
.playouts
- base_playouts
, played_games
,
455 /* Do not resign if we're so short of time that evaluation of best
456 * move is completely unreliable, we might be winning actually.
457 * In this case best is almost random but still better than resign.
458 * Also do not resign if we are getting bad results while actually
459 * giving away extra komi points (dynkomi). */
460 if (tree_node_get_value(u
->t
, 1, best
->u
.value
) < u
->resign_ratio
461 && !is_pass(best
->coord
) && best
->u
.playouts
> GJ_MINGAMES
462 && komi_by_color(u
->t
->extra_komi
, color
) < 0.5) {
463 *best_coord
= resign
;
467 /* If the opponent just passed and we win counting, always
469 if (b
->moves
> 1 && is_pass(b
->last_move
.coord
)) {
470 /* Make sure enough playouts are simulated. */
471 while (u
->ownermap
.playouts
< GJ_MINGAMES
)
472 uct_playout(u
, b
, color
, u
->t
);
473 if (uct_pass_is_safe(u
, b
, color
, u
->pass_all_alive
|| pass_all_alive
)) {
475 fprintf(stderr
, "<Will rather pass, looks safe enough; score %f>\n",
476 board_official_score(b
, NULL
) / 2);