17 #include "uct/dynkomi.h"
18 #include "uct/internal.h"
19 #include "uct/search.h"
24 #define DESCENT_DLEN 512
28 uct_progress_text(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
, bool final
)
34 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
36 fprintf(stderr
, "... No moves left\n");
39 fprintf(stderr
, "[%d] ", playouts
);
40 fprintf(stderr
, "best %f ", tree_node_get_value(t
, 1, best
->u
.value
));
43 if (t
->use_extra_komi
)
44 fprintf(stderr
, "komi %.1f ", t
->extra_komi
);
47 fprintf(stderr
, "| seq ");
48 for (int depth
= 0; depth
< 4; depth
++) {
49 if (best
&& best
->u
.playouts
>= 25) {
50 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
51 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
58 fprintf(stderr
, "| can ");
60 struct tree_node
*can
[cans
];
61 memset(can
, 0, sizeof(can
));
62 best
= t
->root
->children
;
65 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
66 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
67 if (c
> 0) can
[c
- 1] = best
;
72 fprintf(stderr
, "%3s(%.3f) ",
73 coord2sstr(can
[cans
]->coord
, t
->board
),
74 tree_node_get_value(t
, 1, can
[cans
]->u
.value
));
80 fprintf(stderr
, "\n");
84 uct_progress_json(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
, bool final
, bool big
)
86 /* Prefix indicating JSON line. */
87 fprintf(stderr
, "/**/ {\"%s\": {", final
? "final" : "interm");
90 fprintf(stderr
, "\"playouts\": %d", playouts
);
93 if (t
->use_extra_komi
)
94 fprintf(stderr
, ", \"extrakomi\": %.1f", t
->extra_komi
);
96 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
99 fprintf(stderr
, ", \"best\": {\"%s\": %f}",
100 coord2sstr(best
->coord
, t
->board
),
101 tree_node_get_value(t
, 1, best
->u
.value
));
104 fprintf(stderr
, ", \"seq\": [");
105 for (int depth
= 0; depth
< 4; depth
++) {
106 if (!best
|| best
->u
.playouts
< 25) break;
107 fprintf(stderr
, "%s\"%s\"", depth
> 0 ? "," : "",
108 coord2sstr(best
->coord
, t
->board
));
109 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
111 fprintf(stderr
, "]");
114 /* Best candidates */
116 struct tree_node
*can
[cans
];
117 memset(can
, 0, sizeof(can
));
118 best
= t
->root
->children
;
121 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
122 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
123 if (c
> 0) can
[c
- 1] = best
;
124 best
= best
->sibling
;
126 fprintf(stderr
, ", \"can\": [");
127 while (--cans
>= 0) {
128 if (!can
[cans
]) break;
129 fprintf(stderr
, "%s{\"%s\":%.3f}",
131 coord2sstr(can
[cans
]->coord
, t
->board
),
132 tree_node_get_value(t
, 1, can
[cans
]->u
.value
));
134 fprintf(stderr
, "]");
137 /* Ownership statistics. {"b":val,"w":val,"d":val}
138 * where each val is in range [0,1] describes likelihood
139 * of this point becoming black, white and dame.
140 * If dame rate would be 0, only black rate is sent and
141 * white rate can be computed as 1-blackrate. */
142 fprintf(stderr
, ", \"ownage\": [");
144 foreach_point(t
->board
) {
145 if (board_at(t
->board
, c
) != S_NONE
) continue;
146 fprintf(stderr
, "%s{\"%s\":{", f
++ > 0 ? "," : "",
147 coord2sstr(c
, t
->board
));
148 floating_t drate
= (floating_t
) u
->ownermap
.map
[c
][S_NONE
] / u
->ownermap
.playouts
;
150 #define print_rate(color,letter) \
151 if (drate >= 0.001 || color == S_BLACK) { \
152 floating_t rate = (floating_t) u->ownermap.map[c][color] / u->ownermap.playouts; \
153 fprintf(stderr, "%s\"%c\":%.3f", p ? "," : "", letter, rate); \
156 print_rate(S_BLACK
, 'b');
157 print_rate(S_WHITE
, 'w');
158 print_rate(S_NONE
, 'd');
159 fprintf(stderr
, "}}");
161 fprintf(stderr
, "]");
163 /* Chain status statistics. Each chain is identified
164 * by some stone within, and bound to a value in range
165 * [0,1] describing likelihood of survival. */
166 fprintf(stderr
, ", \"chainstatus\": [");
168 foreach_point(t
->board
) {
169 group_t g
= group_at(t
->board
, c
);
170 if (!g
|| groupnext_at(t
->board
, c
)) continue;
171 /* Last stone in some group. */
172 fprintf(stderr
, "%s{\"%s\":%.3f}",
174 coord2sstr(c
, t
->board
),
175 (floating_t
) u
->ownermap
.map
[c
][board_at(t
->board
, c
)] / u
->ownermap
.playouts
);
177 fprintf(stderr
, "]");
180 fprintf(stderr
, "}}\n");
184 uct_progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
, bool final
)
186 switch (u
->reporting
) {
188 uct_progress_text(u
, t
, color
, playouts
, final
);
192 uct_progress_json(u
, t
, color
, playouts
, final
,
193 u
->reporting
== UR_JSON_BIG
);
201 record_amaf_move(struct playout_amafmap
*amaf
, coord_t coord
, enum stone color
)
203 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
204 amaf
->map
[coord
] = color
;
205 } else { // XXX: Respect amaf->record_nakade
206 amaf_op(amaf
->map
[coord
], +);
208 amaf
->game
[amaf
->gamelen
].coord
= coord
;
209 amaf
->game
[amaf
->gamelen
].color
= color
;
211 assert(amaf
->gamelen
< sizeof(amaf
->game
) / sizeof(amaf
->game
[0]));
215 struct uct_playout_callback
{
218 struct tree_node
*lnode
;
223 uct_playout_hook(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
, int mode
)
225 /* XXX: This is used in some non-master branches. */
230 uct_playout_prepolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
232 return uct_playout_hook(playout
, setup
, b
, color
, 0);
236 uct_playout_postpolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
238 return uct_playout_hook(playout
, setup
, b
, color
, 1);
243 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
244 struct playout_amafmap
*amaf
,
245 struct uct_descent
*descent
, int *dlen
,
246 struct tree_node
*significant
[2],
247 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
250 enum stone next_color
= stone_other(node_color
);
251 int parity
= (next_color
== player_color
? 1 : -1);
253 /* We need to make sure only one thread expands the node. If
254 * we are unlucky enough for two threads to meet in the same
255 * node, the latter one will simply do another simulation from
256 * the node itself, no big deal. t->nodes_size may exceed
257 * the maximum in multi-threaded case but not by much so it's ok.
258 * The size test must be before the test&set not after, to allow
259 * expansion of the node later if enough nodes have been freed. */
260 if (n
->u
.playouts
>= u
->expand_p
&& t
->nodes_size
< u
->max_tree_size
261 && !__sync_lock_test_and_set(&n
->is_expanded
, 1)) {
262 tree_expand_node(t
, n
, b
, next_color
, u
, parity
);
265 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
266 spaces
, n
->u
.playouts
, coord2sstr(n
->coord
, t
->board
),
267 tree_node_get_value(t
, parity
, n
->u
.value
));
269 struct uct_playout_callback upc
= {
272 /* TODO: Don't necessarily restart the sequence walk when
273 * entering playout. */
277 struct playout_setup ps
= {
278 .gamelen
= u
->gamelen
,
279 .mercymin
= u
->mercymin
,
280 .prepolicy_hook
= uct_playout_prepolicy
,
281 .postpolicy_hook
= uct_playout_postpolicy
,
284 int result
= play_random_game(&ps
, b
, next_color
,
285 u
->playout_amaf
? amaf
: NULL
,
286 &u
->ownermap
, u
->playout
);
287 if (next_color
== S_WHITE
) {
288 /* We need the result from black's perspective. */
292 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
293 spaces
, player_color
, next_color
, coord2sstr(n
->coord
, t
->board
), result
);
299 scale_value(struct uct
*u
, struct board
*b
, int result
)
301 floating_t rval
= result
> 0 ? 1.0 : result
< 0 ? 0.0 : 0.5;
302 if (u
->val_scale
&& result
!= 0) {
303 int vp
= u
->val_points
;
305 vp
= board_size(b
) - 1; vp
*= vp
; vp
*= 2;
308 floating_t sval
= (floating_t
) abs(result
) / vp
;
309 sval
= sval
> 1 ? 1 : sval
;
310 if (result
< 0) sval
= 1 - sval
;
312 rval
+= u
->val_scale
* sval
;
314 rval
= (1 - u
->val_scale
) * rval
+ u
->val_scale
* sval
;
315 // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
321 record_local_sequence(struct uct
*u
, struct tree
*t
,
322 struct uct_descent
*descent
, int dlen
, int di
,
323 enum stone seq_color
, floating_t rval
)
325 #define LTREE_DEBUG if (UDEBUGL(6))
327 /* Ignore pass sequences. */
328 if (is_pass(descent
[di
].node
->coord
))
331 /* Transform the rval appropriately, based on the expected
332 * result at the root of the sequence. */
333 if (u
->local_tree_rootseqval
) {
334 float expval
= descent
[di
- 1].value
.value
;
335 rval
= stats_temper_value(rval
, expval
, u
->local_tree
);
338 LTREE_DEBUG
fprintf(stderr
, "recording result %f in local %s sequence: ",
339 rval
, stone2str(seq_color
));
341 /* Sequences starting deeper are less relevant in general. */
342 int pval
= LTREE_PLAYOUTS_MULTIPLIER
;
343 if (u
->local_tree
&& u
->local_tree_depth_decay
> 0)
344 pval
= ((floating_t
) pval
) / pow(u
->local_tree_depth_decay
, di
- 1);
346 LTREE_DEBUG
fprintf(stderr
, "too deep @%d\n", di
);
350 /* Pick the right local tree root... */
351 struct tree_node
*lnode
= seq_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
354 /* ...and record the sequence. */
356 while (di
< dlen
&& (di
== di0
|| descent
[di
].node
->d
< u
->tenuki_d
)) {
357 LTREE_DEBUG
fprintf(stderr
, "%s[%d] ",
358 coord2sstr(descent
[di
].node
->coord
, t
->board
),
359 descent
[di
].node
->d
);
360 lnode
= tree_get_node(t
, lnode
, descent
[di
++].node
->coord
, true);
362 stats_add_result(&lnode
->u
, rval
, pval
);
365 /* Add lnode for tenuki (pass) if we descended further. */
367 LTREE_DEBUG
fprintf(stderr
, "pass ");
368 lnode
= tree_get_node(t
, lnode
, pass
, true);
370 stats_add_result(&lnode
->u
, rval
, pval
);
373 LTREE_DEBUG
fprintf(stderr
, "\n");
378 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
383 struct playout_amafmap
*amaf
= NULL
;
384 if (u
->policy
->wants_amaf
) {
385 amaf
= calloc2(1, sizeof(*amaf
));
386 amaf
->map
= calloc2(board_size2(&b2
) + 1, sizeof(*amaf
->map
));
387 amaf
->map
++; // -1 is pass
390 /* Walk the tree until we find a leaf, then expand it and do
391 * a random playout. */
392 struct tree_node
*n
= t
->root
;
393 enum stone node_color
= stone_other(player_color
);
394 assert(node_color
== t
->root_color
);
396 /* Tree descent history. */
397 /* XXX: This is somewhat messy since @n and descent[dlen-1].node are
399 struct uct_descent descent
[DESCENT_DLEN
];
400 descent
[0].node
= n
; descent
[0].lnode
= NULL
;
402 /* Total value of the sequence. */
403 struct move_stats seq_value
= { .playouts
= 0 };
404 /* The last "significant" node along the descent (i.e. node
405 * with higher than configured number of playouts). For black
407 struct tree_node
*significant
[2] = { NULL
, NULL
};
408 if (n
->u
.playouts
>= u
->significant_threshold
)
409 significant
[node_color
- 1] = n
;
412 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
413 int passes
= is_pass(b
->last_move
.coord
) && b
->moves
> 0;
416 static char spaces
[] = "\0 ";
419 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
421 while (!tree_leaf_node(n
) && passes
< 2) {
422 spaces
[dlen
- 1] = ' '; spaces
[dlen
] = 0;
425 /*** Choose a node to descend to: */
427 /* Parity is chosen already according to the child color, since
428 * it is applied to children. */
429 node_color
= stone_other(node_color
);
430 int parity
= (node_color
== player_color
? 1 : -1);
432 assert(dlen
< DESCENT_DLEN
);
433 descent
[dlen
] = descent
[dlen
- 1];
434 if (u
->local_tree
&& (!descent
[dlen
].lnode
|| descent
[dlen
].node
->d
>= u
->tenuki_d
)) {
435 /* Start new local sequence. */
436 /* Remember that node_color already holds color of the
437 * to-be-found child. */
438 descent
[dlen
].lnode
= node_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
441 if (!u
->random_policy_chance
|| fast_random(u
->random_policy_chance
))
442 u
->policy
->descend(u
->policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
444 u
->random_policy
->descend(u
->random_policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
447 /*** Perform the descent: */
449 if (descent
[dlen
].node
->u
.playouts
>= u
->significant_threshold
) {
450 significant
[node_color
- 1] = descent
[dlen
].node
;
453 seq_value
.playouts
+= descent
[dlen
].value
.playouts
;
454 seq_value
.value
+= descent
[dlen
].value
.value
* descent
[dlen
].value
.playouts
;
455 n
= descent
[dlen
++].node
;
456 assert(n
== t
->root
|| n
->parent
);
458 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %d,%f\n",
459 spaces
, coord2sstr(n
->coord
, t
->board
),
460 n
->coord
, n
->u
.playouts
,
461 tree_node_get_value(t
, parity
, n
->u
.value
));
463 /* Add virtual loss if we need to; this is used to discourage
464 * other threads from visiting this node in case of multiple
465 * threads doing the tree search. */
467 stats_add_result(&n
->u
, node_color
== S_BLACK
? 0.0 : 1.0, u
->virtual_loss
);
469 assert(n
->coord
>= -1);
470 if (amaf
&& !is_pass(n
->coord
))
471 record_amaf_move(amaf
, n
->coord
, node_color
);
473 struct move m
= { n
->coord
, node_color
};
474 int res
= board_play(&b2
, &m
);
476 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
477 || b2
.superko_violation
) {
479 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
480 fprintf(stderr
, "%s<%"PRIhash
"> ", coord2sstr(ni
->coord
, t
->board
), ni
->hash
);
481 fprintf(stderr
, "marking invalid %s node %d,%d res %d group %d spk %d\n",
482 stone2str(node_color
), coord_x(n
->coord
,b
), coord_y(n
->coord
,b
),
483 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
485 n
->hints
|= TREE_HINT_INVALID
;
490 if (is_pass(n
->coord
))
497 amaf
->game_baselen
= amaf
->gamelen
;
498 amaf
->record_nakade
= u
->playout_amaf_nakade
;
501 if (t
->use_extra_komi
&& u
->dynkomi
->persim
) {
502 b2
.komi
+= round(u
->dynkomi
->persim(u
->dynkomi
, &b2
, t
, n
));
506 /* XXX: No dead groups support. */
507 floating_t score
= board_official_score(&b2
, NULL
);
508 /* Result from black's perspective (no matter who
509 * the player; black's perspective is always
510 * what the tree stores. */
511 result
= - (score
* 2);
514 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
515 player_color
, node_color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
517 board_print(&b2
, stderr
);
519 board_ownermap_fill(&u
->ownermap
, &b2
);
521 } else { // assert(tree_leaf_node(n));
522 /* In case of parallel tree search, the assertion might
523 * not hold if two threads chew on the same node. */
524 result
= uct_leaf_node(u
, &b2
, player_color
, amaf
, descent
, &dlen
, significant
, t
, n
, node_color
, spaces
);
527 if (amaf
&& u
->playout_amaf_cutoff
) {
528 unsigned int cutoff
= amaf
->game_baselen
;
529 cutoff
+= (amaf
->gamelen
- amaf
->game_baselen
) * u
->playout_amaf_cutoff
/ 100;
530 /* Now, reconstruct the amaf map. */
531 memset(amaf
->map
, 0, board_size2(&b2
) * sizeof(*amaf
->map
));
532 for (unsigned int i
= 0; i
< cutoff
; i
++) {
533 coord_t coord
= amaf
->game
[i
].coord
;
534 enum stone color
= amaf
->game
[i
].color
;
535 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
536 amaf
->map
[coord
] = color
;
537 /* Nakade always recorded for in-tree part */
538 } else if (amaf
->record_nakade
|| i
<= amaf
->game_baselen
) {
539 amaf_op(amaf
->map
[n
->coord
], +);
544 /* Record the result. */
546 assert(n
== t
->root
|| n
->parent
);
547 floating_t rval
= scale_value(u
, b
, result
);
548 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, amaf
, &b2
, rval
);
550 if (t
->use_extra_komi
) {
551 stats_add_result(&u
->dynkomi
->score
, result
/ 2, 1);
552 stats_add_result(&u
->dynkomi
->value
, rval
, 1);
555 if (u
->local_tree
&& n
->parent
&& !is_pass(n
->coord
) && dlen
> 0) {
556 /* Possibly transform the rval appropriately. */
557 if (!u
->local_tree_rootseqval
) {
558 floating_t expval
= seq_value
.value
/ seq_value
.playouts
;
559 rval
= stats_temper_value(rval
, expval
, u
->local_tree
);
562 /* Get the local sequences and record them in ltree. */
563 /* We will look for sequence starts in our descent
564 * history, then run record_local_sequence() for each
565 * found sequence start; record_local_sequence() may
566 * pick longer sequences from descent history then,
567 * which is expected as it will create new lnodes. */
568 enum stone seq_color
= player_color
;
569 /* First move always starts a sequence. */
570 record_local_sequence(u
, t
, descent
, dlen
, 1, seq_color
, rval
);
571 seq_color
= stone_other(seq_color
);
572 for (int dseqi
= 2; dseqi
< dlen
; dseqi
++, seq_color
= stone_other(seq_color
)) {
573 if (u
->local_tree_allseq
) {
574 /* We are configured to record all subsequences. */
575 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
578 if (descent
[dseqi
].node
->d
>= u
->tenuki_d
) {
579 /* Tenuki! Record the fresh sequence. */
580 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
583 if (descent
[dseqi
].lnode
&& !descent
[dseqi
].lnode
) {
584 /* Record result for in-descent picked sequence. */
585 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
592 /* We need to undo the virtual loss we added during descend. */
593 if (u
->virtual_loss
) {
594 floating_t loss
= node_color
== S_BLACK
? 0.0 : 1.0;
595 for (; n
->parent
; n
= n
->parent
) {
596 stats_rm_result(&n
->u
, loss
, u
->virtual_loss
);
605 board_done_noalloc(&b2
);
610 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
)
613 if (ti
&& ti
->dim
== TD_GAMES
) {
614 for (i
= 0; t
->root
->u
.playouts
<= ti
->len
.games
; i
++)
615 uct_playout(u
, b
, color
, t
);
617 for (i
= 0; !uct_halt
; i
++)
618 uct_playout(u
, b
, color
, t
);