17 #include "uct/dynkomi.h"
18 #include "uct/internal.h"
19 #include "uct/search.h"
25 uct_progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
)
31 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
33 fprintf(stderr
, "... No moves left\n");
36 fprintf(stderr
, "[%d] ", playouts
);
37 fprintf(stderr
, "best %f ", tree_node_get_value(t
, 1, best
->u
.value
));
40 if (t
->use_extra_komi
)
41 fprintf(stderr
, "komi %.1f ", t
->extra_komi
);
44 fprintf(stderr
, "| seq ");
45 for (int depth
= 0; depth
< 4; depth
++) {
46 if (best
&& best
->u
.playouts
>= 25) {
47 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
48 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
55 fprintf(stderr
, "| can ");
57 struct tree_node
*can
[cans
];
58 memset(can
, 0, sizeof(can
));
59 best
= t
->root
->children
;
62 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
63 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
64 if (c
> 0) can
[c
- 1] = best
;
69 fprintf(stderr
, "%3s(%.3f) ",
70 coord2sstr(can
[cans
]->coord
, t
->board
),
71 tree_node_get_value(t
, 1, can
[cans
]->u
.value
));
77 fprintf(stderr
, "\n");
82 record_amaf_move(struct playout_amafmap
*amaf
, coord_t coord
, enum stone color
)
84 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
85 amaf
->map
[coord
] = color
;
86 } else { // XXX: Respect amaf->record_nakade
87 amaf_op(amaf
->map
[coord
], +);
89 amaf
->game
[amaf
->gamelen
].coord
= coord
;
90 amaf
->game
[amaf
->gamelen
].color
= color
;
92 assert(amaf
->gamelen
< sizeof(amaf
->game
) / sizeof(amaf
->game
[0]));
96 struct uct_playout_callback
{
99 struct tree_node
*lnode
;
101 coord_t
*treepool
[2];
107 uct_playout_hook(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
, int mode
)
109 struct uct_playout_callback
*upc
= setup
->hook_data
;
110 struct uct
*u
= upc
->uct
;
113 fprintf(stderr
, "treepool check [%d] %d, %p,%p\n", mode
, u
->treepool_chance
[mode
], upc
->treepool
[0], upc
->treepool
[1]);
115 if (u
->treepool_chance
[mode
] > fast_random(100) && upc
->treepool
[color
- 1]) {
116 assert(upc
->treepool_n
[color
- 1] > 0);
118 fprintf(stderr
, "Treepool: ");
119 for (int i
= 0; i
< upc
->treepool_n
[color
- 1]; i
++)
120 fprintf(stderr
, "%s ", coord2sstr(upc
->treepool
[color
- 1][i
], b
));
121 fprintf(stderr
, "\n");
124 coord_t treepool_move
= pass
;
125 if (u
->treepool_pickfactor
) {
126 /* With pickfactor=10, we get uniform distribution. */
127 int prob
= 1000 * u
->treepool_pickfactor
/ (upc
->treepool_n
[color
- 1] * 10);
128 for (int i
= 0; i
< upc
->treepool_n
[color
- 1]; i
++) {
129 treepool_move
= upc
->treepool
[color
- 1][i
];
130 if (prob
> fast_random(1000)) break;
133 treepool_move
= upc
->treepool
[color
- 1][fast_random(upc
->treepool_n
[color
- 1])];
136 fprintf(stderr
, "Treepool pick <%d> %s,%s\n",
137 upc
->treepool_n
[color
- 1],
138 stone2str(color
), coord2sstr(treepool_move
, b
));
140 if (board_is_valid_play(b
, color
, treepool_move
))
141 return treepool_move
;
147 uct_playout_prepolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
149 return uct_playout_hook(playout
, setup
, b
, color
, 0);
153 uct_playout_postpolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
155 return uct_playout_hook(playout
, setup
, b
, color
, 1);
159 treepool_node_value(struct uct
*u
, struct tree
*tree
, int parity
, struct tree_node
*node
)
161 /* XXX: Playouts get cast to double */
162 switch (u
->treepool_type
) {
163 case UTT_RAVE_PLAYOUTS
:
164 return node
->amaf
.playouts
;
166 return tree_node_get_value(tree
, parity
, node
->amaf
.value
);
167 case UTT_UCT_PLAYOUTS
:
168 return node
->u
.playouts
;
170 return tree_node_get_value(tree
, parity
, node
->u
.value
);
173 struct uct_descent d
= { .node
= node
};
174 assert(u
->policy
->evaluate
);
175 return u
->policy
->evaluate(u
->policy
, tree
, &d
, parity
);
183 treepool_setup(struct uct_playout_callback
*upc
, struct board
*b
, struct tree_node
*node
, int color
)
185 struct uct
*u
= upc
->uct
;
186 int parity
= ((node
->depth
^ upc
->tree
->root
->depth
) & 1) ? -1 : 1;
188 /* XXX: Naive O(N^2) way. */
189 for (int i
= 0; i
< u
->treepool_size
; i
++) {
190 /* For each item, find the highest
191 * node not in the pool yet. */
192 struct tree_node
*best
= NULL
;
193 double best_val
= -1;
195 assert(node
->children
&& is_pass(node
->children
->coord
));
196 for (struct tree_node
*ni
= node
->children
->sibling
; ni
; ni
= ni
->sibling
) {
197 /* Do we already have it? */
199 for (int j
= 0; j
< upc
->treepool_n
[color
]; j
++) {
200 if (upc
->treepool
[color
][j
] == ni
->coord
) {
208 double i_val
= treepool_node_value(u
, upc
->tree
, parity
, ni
);
209 if (i_val
> best_val
) {
216 upc
->treepool
[color
][upc
->treepool_n
[color
]++] = best
->coord
;
222 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
223 struct playout_amafmap
*amaf
, struct uct_descent
*descent
,
224 struct tree_node
*significant
[2],
225 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
228 enum stone next_color
= stone_other(node_color
);
229 int parity
= (next_color
== player_color
? 1 : -1);
231 /* We need to make sure only one thread expands the node. If
232 * we are unlucky enough for two threads to meet in the same
233 * node, the latter one will simply do another simulation from
234 * the node itself, no big deal. t->nodes_size may exceed
235 * the maximum in multi-threaded case but not by much so it's ok.
236 * The size test must be before the test&set not after, to allow
237 * expansion of the node later if enough nodes have been freed. */
238 if (n
->u
.playouts
>= u
->expand_p
&& t
->nodes_size
< u
->max_tree_size
239 && !__sync_lock_test_and_set(&n
->is_expanded
, 1)) {
240 tree_expand_node(t
, n
, b
, next_color
, u
, parity
);
243 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
244 spaces
, n
->u
.playouts
, coord2sstr(n
->coord
, t
->board
),
245 tree_node_get_value(t
, parity
, n
->u
.value
));
247 struct uct_playout_callback upc
= {
250 /* TODO: Don't necessarily restart the sequence walk when
251 * entering playout. */
255 coord_t pool
[2][u
->treepool_size
];
256 if (u
->treepool_chance
[0] + u
->treepool_chance
[1] > 0) {
257 for (int color
= 0; color
< 2; color
++) {
258 /* Prepare tree-based pool of moves to try forcing
259 * during the playout. */
260 /* We consider the children of the last significant
261 * node, picking top N choices. */
262 struct tree_node
*n
= significant
[color
];
263 if (!n
|| !n
->children
|| !n
->children
->sibling
) {
264 /* No significant node, or it's childless or has
265 * only pass as its child. */
266 upc
.treepool
[color
] = NULL
;
267 upc
.treepool_n
[color
] = 0;
269 upc
.treepool
[color
] = (coord_t
*) &pool
[color
];
270 treepool_setup(&upc
, b
, n
, color
);
275 struct playout_setup ps
= {
276 .gamelen
= u
->gamelen
,
277 .mercymin
= u
->mercymin
,
278 .prepolicy_hook
= uct_playout_prepolicy
,
279 .postpolicy_hook
= uct_playout_postpolicy
,
282 int result
= play_random_game(&ps
, b
, next_color
,
283 u
->playout_amaf
? amaf
: NULL
,
284 &u
->ownermap
, u
->playout
);
285 if (next_color
== S_WHITE
) {
286 /* We need the result from black's perspective. */
290 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
291 spaces
, player_color
, next_color
, coord2sstr(n
->coord
, t
->board
), result
);
297 scale_value(struct uct
*u
, struct board
*b
, int result
)
299 floating_t rval
= result
> 0;
301 int vp
= u
->val_points
;
303 vp
= board_size(b
) - 1; vp
*= vp
; vp
*= 2;
306 floating_t sval
= (floating_t
) abs(result
) / vp
;
307 sval
= sval
> 1 ? 1 : sval
;
308 if (result
< 0) sval
= 1 - sval
;
310 rval
+= u
->val_scale
* sval
;
312 rval
= (1 - u
->val_scale
) * rval
+ u
->val_scale
* sval
;
313 // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
319 record_local_sequence(struct uct
*u
, struct tree
*t
,
320 struct uct_descent
*descent
, int dlen
, int di
,
321 enum stone seq_color
, floating_t rval
, int pval
)
323 /* Ignore pass sequences. */
324 if (is_pass(descent
[di
].node
->coord
))
327 /* Transform the rval appropriately, based on the expected
328 * result at the root of the sequence. */
329 if (u
->local_tree_rootseqval
) {
330 float expval
= descent
[di
- 1].value
.value
;
331 rval
= stats_temper_value(rval
, expval
, u
->local_tree
);
334 #define LTREE_DEBUG if (UDEBUGL(6))
335 LTREE_DEBUG
fprintf(stderr
, "recording result %f in local %s sequence: ",
336 rval
, stone2str(seq_color
));
339 /* Pick the right local tree root... */
340 struct tree_node
*lnode
= seq_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
343 /* ...and record the sequence. */
344 while (di
< dlen
&& (di
== di0
|| descent
[di
].node
->d
< u
->tenuki_d
)) {
345 LTREE_DEBUG
fprintf(stderr
, "%s[%d] ",
346 coord2sstr(descent
[di
].node
->coord
, t
->board
),
347 descent
[di
].node
->d
);
348 lnode
= tree_get_node(t
, lnode
, descent
[di
++].node
->coord
, true);
350 stats_add_result(&lnode
->u
, rval
, pval
);
353 /* Add lnode for tenuki (pass) if we descended further. */
355 LTREE_DEBUG
fprintf(stderr
, "pass ");
356 lnode
= tree_get_node(t
, lnode
, pass
, true);
358 stats_add_result(&lnode
->u
, rval
, pval
);
361 LTREE_DEBUG
fprintf(stderr
, "\n");
366 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
371 struct playout_amafmap
*amaf
= NULL
;
372 if (u
->policy
->wants_amaf
) {
373 amaf
= calloc2(1, sizeof(*amaf
));
374 amaf
->map
= calloc2(board_size2(&b2
) + 1, sizeof(*amaf
->map
));
375 amaf
->map
++; // -1 is pass
378 /* Walk the tree until we find a leaf, then expand it and do
379 * a random playout. */
380 struct tree_node
*n
= t
->root
;
381 enum stone node_color
= stone_other(player_color
);
382 assert(node_color
== t
->root_color
);
384 /* Tree descent history. */
385 /* XXX: This is somewhat messy since @n and descent[dlen-1].node are
388 struct uct_descent descent
[DLEN
];
389 descent
[0].node
= n
; descent
[0].lnode
= NULL
;
391 /* Total value of the sequence. */
392 struct move_stats seq_value
= { .playouts
= 0 };
393 /* The last "significant" node along the descent (i.e. node
394 * with higher than configured number of playouts). For black
396 struct tree_node
*significant
[2] = { NULL
, NULL
};
397 if (n
->u
.playouts
>= u
->significant_threshold
)
398 significant
[node_color
- 1] = n
;
401 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
402 int passes
= is_pass(b
->last_move
.coord
) && b
->moves
> 0;
406 static char spaces
[] = "\0 ";
409 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
411 while (!tree_leaf_node(n
) && passes
< 2) {
412 spaces
[depth
++] = ' '; spaces
[depth
] = 0;
415 /*** Choose a node to descend to: */
417 /* Parity is chosen already according to the child color, since
418 * it is applied to children. */
419 node_color
= stone_other(node_color
);
420 int parity
= (node_color
== player_color
? 1 : -1);
423 descent
[dlen
] = descent
[dlen
- 1];
424 if (u
->local_tree
&& (!descent
[dlen
].lnode
|| descent
[dlen
].node
->d
>= u
->tenuki_d
)) {
425 /* Start new local sequence. */
426 /* Remember that node_color already holds color of the
427 * to-be-found child. */
428 descent
[dlen
].lnode
= node_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
431 if (!u
->random_policy_chance
|| fast_random(u
->random_policy_chance
))
432 u
->policy
->descend(u
->policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
434 u
->random_policy
->descend(u
->random_policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
437 /*** Perform the descent: */
439 if (descent
[dlen
].node
->u
.playouts
>= u
->significant_threshold
) {
440 significant
[node_color
- 1] = descent
[dlen
].node
;
443 seq_value
.playouts
+= descent
[dlen
].value
.playouts
;
444 seq_value
.value
+= descent
[dlen
].value
.value
* descent
[dlen
].value
.playouts
;
445 n
= descent
[dlen
++].node
;
446 assert(n
== t
->root
|| n
->parent
);
448 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %d,%f\n",
449 spaces
, coord2sstr(n
->coord
, t
->board
),
450 n
->coord
, n
->u
.playouts
,
451 tree_node_get_value(t
, parity
, n
->u
.value
));
453 /* Add virtual loss if we need to; this is used to discourage
454 * other threads from visiting this node in case of multiple
455 * threads doing the tree search. */
457 stats_add_result(&n
->u
, node_color
== S_BLACK
? 0.0 : 1.0, u
->virtual_loss
);
459 assert(n
->coord
>= -1);
460 if (amaf
&& !is_pass(n
->coord
))
461 record_amaf_move(amaf
, n
->coord
, node_color
);
463 struct move m
= { n
->coord
, node_color
};
464 int res
= board_play(&b2
, &m
);
466 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
467 || b2
.superko_violation
) {
469 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
470 fprintf(stderr
, "%s<%"PRIhash
"> ", coord2sstr(ni
->coord
, t
->board
), ni
->hash
);
471 fprintf(stderr
, "marking invalid %s node %d,%d res %d group %d spk %d\n",
472 stone2str(node_color
), coord_x(n
->coord
,b
), coord_y(n
->coord
,b
),
473 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
475 n
->hints
|= TREE_HINT_INVALID
;
480 if (is_pass(n
->coord
))
487 amaf
->game_baselen
= amaf
->gamelen
;
488 amaf
->record_nakade
= u
->playout_amaf_nakade
;
491 if (t
->use_extra_komi
&& u
->dynkomi
->persim
) {
492 b2
.komi
+= round(u
->dynkomi
->persim(u
->dynkomi
, &b2
, t
, n
));
496 /* XXX: No dead groups support. */
497 floating_t score
= board_official_score(&b2
, NULL
);
498 /* Result from black's perspective (no matter who
499 * the player; black's perspective is always
500 * what the tree stores. */
501 result
= - (score
* 2);
504 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
505 player_color
, node_color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
507 board_print(&b2
, stderr
);
509 board_ownermap_fill(&u
->ownermap
, &b2
);
511 } else { // assert(tree_leaf_node(n));
512 /* In case of parallel tree search, the assertion might
513 * not hold if two threads chew on the same node. */
514 result
= uct_leaf_node(u
, &b2
, player_color
, amaf
, &descent
[dlen
- 1], significant
, t
, n
, node_color
, spaces
);
517 if (amaf
&& u
->playout_amaf_cutoff
) {
518 unsigned int cutoff
= amaf
->game_baselen
;
519 cutoff
+= (amaf
->gamelen
- amaf
->game_baselen
) * u
->playout_amaf_cutoff
/ 100;
520 /* Now, reconstruct the amaf map. */
521 memset(amaf
->map
, 0, board_size2(&b2
) * sizeof(*amaf
->map
));
522 for (unsigned int i
= 0; i
< cutoff
; i
++) {
523 coord_t coord
= amaf
->game
[i
].coord
;
524 enum stone color
= amaf
->game
[i
].color
;
525 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
526 amaf
->map
[coord
] = color
;
527 /* Nakade always recorded for in-tree part */
528 } else if (amaf
->record_nakade
|| i
<= amaf
->game_baselen
) {
529 amaf_op(amaf
->map
[n
->coord
], +);
534 assert(n
== t
->root
|| n
->parent
);
536 floating_t rval
= scale_value(u
, b
, result
);
537 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, amaf
, rval
);
539 int pval
= LTREE_PLAYOUTS_MULTIPLIER
;
540 if (u
->local_tree_depth_decay
> 0)
541 pval
= ((floating_t
) pval
) / pow(u
->local_tree_depth_decay
, depth
);
543 if (t
->use_extra_komi
) {
544 stats_add_result(&u
->dynkomi
->score
, result
/ 2, 1);
545 stats_add_result(&u
->dynkomi
->value
, rval
, 1);
548 if (u
->local_tree
&& n
->parent
&& !is_pass(n
->coord
) && dlen
> 0) {
549 /* Possibly transform the rval appropriately. */
550 if (!u
->local_tree_rootseqval
) {
551 floating_t expval
= seq_value
.value
/ seq_value
.playouts
;
552 rval
= stats_temper_value(rval
, expval
, u
->local_tree
);
555 /* Get the local sequences and record them in ltree. */
556 /* We will look for sequence starts in our descent
557 * history, then run record_local_sequence() for each
558 * found sequence start; record_local_sequence() may
559 * pick longer sequences from descent history then,
560 * which is expected as it will create new lnodes. */
561 enum stone seq_color
= player_color
;
562 /* First move always starts a sequence. */
563 record_local_sequence(u
, t
, descent
, dlen
, 1, seq_color
, rval
, pval
);
564 seq_color
= stone_other(seq_color
);
565 for (int dseqi
= 2; dseqi
< dlen
; dseqi
++, seq_color
= stone_other(seq_color
)) {
566 if (u
->local_tree_allseq
) {
567 /* We are configured to record all subsequences. */
568 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
, pval
);
571 if (descent
[dseqi
].node
->d
>= u
->tenuki_d
) {
572 /* Tenuki! Record the fresh sequence. */
573 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
, pval
);
576 if (descent
[dseqi
].lnode
&& !descent
[dseqi
].lnode
) {
577 /* Record result for in-descent picked sequence. */
578 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
, pval
);
586 /* We need to undo the virtual loss we added during descend. */
587 if (u
->virtual_loss
) {
588 floating_t loss
= node_color
== S_BLACK
? 0.0 : 1.0;
589 for (; n
->parent
; n
= n
->parent
) {
590 stats_rm_result(&n
->u
, loss
, u
->virtual_loss
);
599 board_done_noalloc(&b2
);
604 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
)
607 if (ti
&& ti
->dim
== TD_GAMES
) {
608 for (i
= 0; t
->root
->u
.playouts
<= ti
->len
.games
; i
++)
609 uct_playout(u
, b
, color
, t
);
611 for (i
= 0; !uct_halt
; i
++)
612 uct_playout(u
, b
, color
, t
);