12 #include "playout/moggy.h"
13 #include "playout/old.h"
14 #include "playout/light.h"
15 #include "uct/internal.h"
19 struct uct_policy
*policy_ucb1_init(struct uct
*u
, char *arg
);
20 struct uct_policy
*policy_ucb1tuned_init(struct uct
*u
, char *arg
);
21 struct uct_policy
*policy_ucb1amaf_init(struct uct
*u
, char *arg
);
24 #define MC_GAMES 40000
25 #define MC_GAMELEN 400
29 progress_status(struct uct
*u
, struct tree
*t
, enum stone color
)
35 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
);
37 fprintf(stderr
, "... No moves left\n");
40 fprintf(stderr
, "best %f ", best
->u
.value
);
43 fprintf(stderr
, "deepest % 2d ", t
->max_depth
- t
->root
->depth
);
46 fprintf(stderr
, "| seq ");
47 for (int depth
= 0; depth
< 6; depth
++) {
48 if (best
&& best
->u
.playouts
>= 25) {
49 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
50 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
);
57 fprintf(stderr
, "| can ");
59 struct tree_node
*can
[cans
];
60 memset(can
, 0, sizeof(can
));
61 best
= t
->root
->children
;
64 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
65 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
66 if (c
> 0) can
[c
- 1] = best
;
71 fprintf(stderr
, "%3s(%.3f) ", coord2sstr(can
[cans
]->coord
, t
->board
), can
[cans
]->u
.value
);
77 fprintf(stderr
, "\n");
82 uct_playout(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
)
87 struct playout_amafmap
*amaf
= NULL
;
88 if (u
->policy
->wants_amaf
) {
89 amaf
= calloc(1, sizeof(*amaf
));
90 amaf
->map
= calloc(b2
.size2
+ 1, sizeof(*amaf
->map
));
91 amaf
->map
++; // -1 is pass
94 /* Walk the tree until we find a leaf, then expand it and do
95 * a random playout. */
96 struct tree_node
*n
= t
->root
;
97 enum stone orig_color
= color
;
99 int pass_limit
= (b2
.size
- 2) * (b2
.size
- 2) / 2;
100 int passes
= is_pass(b
->last_move
.coord
);
102 fprintf(stderr
, "--- UCT walk with color %d\n", color
);
103 for (; pass
; color
= stone_other(color
)) {
104 if (tree_leaf_node(n
)) {
105 if (n
->u
.playouts
>= u
->expand_p
)
106 tree_expand_node(t
, n
, &b2
, color
, u
->radar_d
, u
->policy
, (color
== orig_color
? 1 : -1));
108 result
= play_random_game(&b2
, color
, u
->gamelen
, u
->playout_amaf
? amaf
: NULL
, u
->playout
);
109 if (orig_color
!= color
&& result
>= 0)
112 fprintf(stderr
, "[%d..%d] %s random playout result %d\n", orig_color
, color
, coord2sstr(n
->coord
, t
->board
), result
);
114 /* Reset color to the @n color. */
115 color
= stone_other(color
);
119 n
= u
->policy
->descend(u
->policy
, t
, n
, (color
== orig_color
? 1 : -1), pass_limit
);
120 assert(n
== t
->root
|| n
->parent
);
122 fprintf(stderr
, "-- UCT sent us to [%s] %f\n", coord2sstr(n
->coord
, t
->board
), n
->u
.value
);
123 if (amaf
&& n
->coord
>= -1)
124 amaf
->map
[n
->coord
] = color
;
125 struct move m
= { n
->coord
, color
};
126 int res
= board_play(&b2
, &m
);
127 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
128 || b2
.superko_violation
) {
130 fprintf(stderr
, "deleting invalid node %d,%d\n", coord_x(n
->coord
,b
), coord_y(n
->coord
,b
));
131 tree_delete_node(t
, n
);
136 if (is_pass(n
->coord
)) {
139 float score
= board_official_score(&b2
);
140 result
= (orig_color
== S_BLACK
) ? score
< 0 : score
> 0;
142 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n", orig_color
, color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
144 board_print(&b2
, stderr
);
152 assert(n
== t
->root
|| n
->parent
);
154 u
->policy
->update(u
->policy
, n
, color
, amaf
, result
);
161 board_done_noalloc(&b2
);
166 uct_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
168 struct uct
*u
= e
->data
;
172 u
->t
= tree_init(b
, color
);
173 //board_print(b, stderr);
175 /* XXX: We hope that the opponent didn't suddenly play
176 * several moves in the row. */
177 for (struct tree_node
*ni
= u
->t
->root
->children
; ni
; ni
= ni
->sibling
)
178 if (ni
->coord
== b
->last_move
.coord
) {
179 tree_promote_node(u
->t
, ni
);
182 fprintf(stderr
, "CANNOT FIND NODE TO PROMOTE!\n");
189 for (i
= 0; i
< u
->games
; i
++) {
190 int result
= uct_playout(u
, b
, color
, u
->t
);
192 /* Tree descent has hit invalid move. */
196 if (i
> 0 && !(i
% 10000)) {
197 progress_status(u
, u
->t
, color
);
200 if (i
> 0 && !(i
% 500)) {
201 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
);
202 if (best
&& best
->u
.playouts
>= 1000 && best
->u
.value
>= u
->loss_threshold
)
207 progress_status(u
, u
->t
, color
);
209 tree_dump(u
->t
, u
->dumpthres
);
211 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
);
213 tree_done(u
->t
); u
->t
= NULL
;
214 return coord_copy(pass
);
217 fprintf(stderr
, "*** WINNER is %s (%d,%d) with score %1.4f (%d games)\n", coord2sstr(best
->coord
, b
), coord_x(best
->coord
, b
), coord_y(best
->coord
, b
), best
->u
.value
, u
->t
->root
->u
.playouts
);
218 if (best
->u
.value
< u
->resign_ratio
&& !is_pass(best
->coord
)) {
219 tree_done(u
->t
); u
->t
= NULL
;
220 return coord_copy(resign
);
222 tree_promote_node(u
->t
, best
);
223 return coord_copy(best
->coord
);
228 uct_state_init(char *arg
)
230 struct uct
*u
= calloc(1, sizeof(struct uct
));
234 u
->gamelen
= MC_GAMELEN
;
239 char *optspec
, *next
= arg
;
242 next
+= strcspn(next
, ",");
243 if (*next
) { *next
++ = 0; } else { *next
= 0; }
245 char *optname
= optspec
;
246 char *optval
= strchr(optspec
, '=');
247 if (optval
) *optval
++ = 0;
249 if (!strcasecmp(optname
, "debug")) {
251 u
->debug_level
= atoi(optval
);
254 } else if (!strcasecmp(optname
, "games") && optval
) {
255 u
->games
= atoi(optval
);
256 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
257 u
->gamelen
= atoi(optval
);
258 } else if (!strcasecmp(optname
, "expand_p") && optval
) {
259 u
->expand_p
= atoi(optval
);
260 } else if (!strcasecmp(optname
, "radar_d") && optval
) {
261 /* For 19x19, it is good idea to set this to 3. */
262 u
->radar_d
= atoi(optval
);
263 } else if (!strcasecmp(optname
, "dumpthres") && optval
) {
264 u
->dumpthres
= atoi(optval
);
265 } else if (!strcasecmp(optname
, "playout_amaf")) {
266 /* Whether to include random playout moves in
267 * AMAF as well. (Otherwise, only tree moves
268 * are included in AMAF. Of course makes sense
269 * only in connection with an AMAF policy.) */
270 u
->playout_amaf
= true;
271 } else if (!strcasecmp(optname
, "policy") && optval
) {
272 char *policyarg
= strchr(optval
, ':');
275 if (!strcasecmp(optval
, "ucb1")) {
276 u
->policy
= policy_ucb1_init(u
, policyarg
);
277 } else if (!strcasecmp(optval
, "ucb1tuned")) {
278 u
->policy
= policy_ucb1tuned_init(u
, policyarg
);
279 } else if (!strcasecmp(optval
, "ucb1amaf")) {
280 u
->policy
= policy_ucb1amaf_init(u
, policyarg
);
282 fprintf(stderr
, "UCT: Invalid tree policy %s\n", optval
);
284 } else if (!strcasecmp(optname
, "playout") && optval
) {
285 char *playoutarg
= strchr(optval
, ':');
288 if (!strcasecmp(optval
, "old")) {
289 u
->playout
= playout_old_init(playoutarg
);
290 } else if (!strcasecmp(optval
, "moggy")) {
291 u
->playout
= playout_moggy_init(playoutarg
);
292 } else if (!strcasecmp(optval
, "light")) {
293 u
->playout
= playout_light_init(playoutarg
);
295 fprintf(stderr
, "UCT: Invalid playout policy %s\n", optval
);
298 fprintf(stderr
, "uct: Invalid engine argument %s or missing value\n", optname
);
303 u
->resign_ratio
= 0.2; /* Resign when most games are lost. */
304 u
->loss_threshold
= 0.95; /* Stop reading if after at least 500 playouts this is best value. */
306 u
->policy
= policy_ucb1_init(u
, NULL
);
309 u
->playout
= playout_light_init(NULL
);
310 u
->playout
->debug_level
= u
->debug_level
;
317 engine_uct_init(char *arg
)
319 struct uct
*u
= uct_state_init(arg
);
320 struct engine
*e
= calloc(1, sizeof(struct engine
));
321 e
->name
= "UCT Engine";
322 e
->comment
= "I'm playing UCT. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
323 e
->genmove
= uct_genmove
;