11 #include "montecarlo/hint.h"
12 #include "montecarlo/internal.h"
18 #define MC_GAMES 40000
19 #define MC_GAMELEN 400
22 /* Internal engine state. */
35 #define UDEBUGL(n) DEBUGL_(u->debug_level, n)
39 domainhint_policy(void *playout_policy
, struct board
*b
, enum stone my_color
)
41 struct uct
*u
= playout_policy
;
42 return domain_hint(&u
->mc
, b
, my_color
);
46 uct_playout(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
)
51 /* Walk the tree until we find a leaf, then expand it and do
52 * a random playout. */
53 struct tree_node
*n
= t
->root
;
54 enum stone orig_color
= color
;
58 fprintf(stderr
, "--- UCT walk\n");
59 for (; pass
; color
= stone_other(color
)) {
60 if (tree_leaf_node(n
)) {
61 if (n
->playouts
>= u
->expand_p
)
62 tree_expand_node(t
, n
, &b2
);
64 struct move m
= { n
->coord
, color
};
65 result
= play_random_game(&b2
, &m
, u
->gamelen
, domainhint_policy
, u
);
66 if (orig_color
!= color
&& result
>= 0)
69 fprintf(stderr
, "[%d..%d] %s playout result %d\n", orig_color
, color
, coord2sstr(n
->coord
, t
->board
), result
);
73 n
= tree_uct_descend(t
, n
, (color
== orig_color
? 1 : -1), b2
.moves
> (b2
.size2
- 2) / 2);
75 fprintf(stderr
, "-- UCT sent us to [%s] %f\n", coord2sstr(n
->coord
, t
->board
), n
->value
);
76 struct move m
= { n
->coord
, color
};
77 int res
= board_play(&b2
, &m
);
78 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */) {
80 fprintf(stderr
, "deleting invalid node %d,%d\n", coord_x(n
->coord
,b
), coord_y(n
->coord
,b
));
82 board_done_noalloc(&b2
);
86 if (is_pass(n
->coord
)) {
89 float score
= board_fast_score(&b2
) > 0;
90 result
= (orig_color
== S_BLACK
) ? score
< 0 : score
> 0;
92 fprintf(stderr
, "[%d..%d] %s playout result %d (W %f)\n", orig_color
, color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
94 board_print(&b2
, stderr
);
103 tree_uct_update(n
, result
);
104 board_done_noalloc(&b2
);
109 uct_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
111 struct uct
*u
= e
->data
;
116 u
->t
->explore_p
= u
->explore_p
;
118 /* XXX: We hope that the opponent didn't suddenly play
119 * several moves in the row. */
120 for (struct tree_node
*ni
= u
->t
->root
->children
; ni
; ni
= ni
->sibling
)
121 if (ni
->coord
== b
->last_move
.coord
) {
122 tree_promote_node(u
->t
, ni
);
125 fprintf(stderr
, "CANNOT FIND NODE TO PROMOTE!\n");
132 for (i
= 0; i
< u
->games
; i
++) {
133 int result
= uct_playout(u
, b
, color
, u
->t
);
135 /* Tree descent has hit invalid move. */
139 if (i
> 0 && !(i
% 1000)) {
140 struct tree_node
*best
= tree_best_child(u
->t
->root
);
141 if (best
&& best
->playouts
>= 100 && best
->value
>= u
->loss_threshold
)
149 struct tree_node
*best
= tree_best_child(u
->t
->root
);
151 tree_done(u
->t
); u
->t
= NULL
;
152 return coord_copy(pass
);
155 fprintf(stderr
, "*** WINNER is %d,%d with score %1.4f (%d games)\n", coord_x(best
->coord
, b
), coord_y(best
->coord
, b
), best
->value
, i
);
156 if (best
->value
< u
->resign_ratio
&& !is_pass(best
->coord
)) {
157 tree_done(u
->t
); u
->t
= NULL
;
158 return coord_copy(resign
);
160 tree_promote_node(u
->t
, best
);
161 return coord_copy(best
->coord
);
166 uct_state_init(char *arg
)
168 struct uct
*u
= calloc(1, sizeof(struct uct
));
172 u
->gamelen
= MC_GAMELEN
;
175 u
->mc
.capture_rate
= 100;
176 u
->mc
.atari_rate
= 100;
178 // Looking at the actual playouts, this just encourages MC to make
180 u
->mc
.local_rate
= 0;
183 char *optspec
, *next
= arg
;
186 next
+= strcspn(next
, ",");
187 if (*next
) { *next
++ = 0; } else { *next
= 0; }
189 char *optname
= optspec
;
190 char *optval
= strchr(optspec
, '=');
191 if (optval
) *optval
++ = 0;
193 if (!strcasecmp(optname
, "debug")) {
195 u
->debug_level
= atoi(optval
);
198 } else if (!strcasecmp(optname
, "games") && optval
) {
199 u
->games
= atoi(optval
);
200 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
201 u
->gamelen
= atoi(optval
);
202 } else if (!strcasecmp(optname
, "explore_p") && optval
) {
203 u
->explore_p
= atof(optval
);
204 } else if (!strcasecmp(optname
, "expand_p") && optval
) {
205 u
->expand_p
= atoi(optval
);
206 } else if (!strcasecmp(optname
, "pure")) {
207 u
->mc
.capture_rate
= u
->mc
.local_rate
= u
->mc
.cut_rate
= 0;
208 } else if (!strcasecmp(optname
, "capturerate") && optval
) {
209 u
->mc
.capture_rate
= atoi(optval
);
210 } else if (!strcasecmp(optname
, "atarirate") && optval
) {
211 u
->mc
.atari_rate
= atoi(optval
);
212 } else if (!strcasecmp(optname
, "localrate") && optval
) {
213 u
->mc
.local_rate
= atoi(optval
);
214 } else if (!strcasecmp(optname
, "cutrate") && optval
) {
215 u
->mc
.cut_rate
= atoi(optval
);
217 fprintf(stderr
, "uct: Invalid engine argument %s or missing value\n", optname
);
222 u
->resign_ratio
= 0.2; /* Resign when most games are lost. */
223 u
->loss_threshold
= 0.9; /* Stop reading if after at least 1000 games this is best value. */
224 u
->mc
.debug_level
= u
->debug_level
;
231 engine_uct_init(char *arg
)
233 struct uct
*u
= uct_state_init(arg
);
234 struct engine
*e
= calloc(1, sizeof(struct engine
));
235 e
->name
= "UCT Engine";
236 e
->comment
= "I'm playing UCT. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
237 e
->genmove
= uct_genmove
;