8 #include "montecarlo/hint.h"
9 #include "montecarlo/internal.h"
10 #include "montecarlo/montecarlo.h"
13 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
14 * current board and records win/loss ratio for each first move. The move with
15 * the biggest number of winning games gets played. */
16 /* Note that while the library is based on New Zealand rules, this engine
17 * returns moves according to Chinese rules. Thus, it does not return suicide
18 * moves. It of course respects positional superko too. */
20 /* Pass me arguments like a=b,c=d,...
21 * Supported arguments:
22 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
23 * games=MC_GAMES number of random games to play
24 * gamelen=MC_GAMELEN maximal length of played random game
26 * The following arguments tune domain-specific heuristics. They tend to carry
27 * very high performance penalty.
28 * pure turns all the heuristics off; you can then turn
30 * atari_rate=MC_ATARIRATE how many of 100 moves should be non-random but
31 * fix local atari, if there is any
32 * local_rate=MC_LOCALRATE how many of 100 moves should be contact plays
34 * cut_rate=MC_CUTRATE how many of 100 moves should fix local cuts,
38 /* Times for 10000 runs on 1.6GHz Athlon. pure runs at ~500ms */
40 #define MC_GAMES 40000
41 #define MC_GAMELEN 400
42 #define MC_ATARIRATE 50 /* +200ms */
43 #define MC_CUTRATE 40 /* +100ms */
44 #define MC_LOCALRATE 30 /* +100ms */
47 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
48 * simply does not matter; even 100000 simulations are fast enough to
49 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
51 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
52 * to consider 'pass' among the moves, but this seems tricky. */
56 board_stats_print(struct board
*board
, struct move_stat
*moves
, FILE *f
)
60 char asdf
[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
61 for (x
= 1; x
< board
->size
- 1; x
++)
62 fprintf(f
, "%c ", asdf
[x
- 1]);
64 for (x
= 1; x
< board
->size
- 1; x
++)
67 for (y
= board
->size
- 2; y
>= 1; y
--) {
68 fprintf(f
, "%2d | ", y
);
69 for (x
= 1; x
< board
->size
- 1; x
++)
70 if (moves
[y
* board
->size
+ x
].games
)
71 fprintf(f
, "%0.2f ", (float) moves
[y
* board
->size
+ x
].wins
/ moves
[y
* board
->size
+ x
].games
);
75 for (x
= 1; x
< board
->size
- 1; x
++)
76 fprintf(f
, "%4d ", moves
[y
* board
->size
+ x
].games
);
80 for (x
= 1; x
< board
->size
- 1; x
++)
86 /* 1: m->color wins, 0: m->color loses
87 * -1 superko at the root
88 * -2 superko inside the game tree (NOT at root, that's simply invalid move)
89 * -3 first move is multi-stone suicide */
91 play_random_game(struct montecarlo
*mc
, struct board
*b
, struct move
*m
, int i
)
93 if (b
->superko_violation
) {
95 fprintf(stderr
, "\tILLEGAL: superko violation at root!\n");
96 board_print(b
, stderr
);
104 board_play_random(&b2
, m
->color
, &m
->coord
);
105 if (!is_pass(m
->coord
) && !group_at(&b2
, m
->coord
)) {
107 fprintf(stderr
, "SUICIDE DETECTED at %d,%d:\n", coord_x(m
->coord
), coord_y(m
->coord
));
108 board_print(&b2
, stderr
);
110 board_done_noalloc(&b2
);
115 fprintf(stderr
, "[%d,%d] playing random game\n", coord_x(m
->coord
), coord_y(m
->coord
));
117 int gamelen
= mc
->gamelen
- b2
.moves
;
121 enum stone color
= stone_other(m
->color
);
124 int passes
= is_pass(m
->coord
);
126 /* Special check: We probably tenukied the last opponent's move. But
127 * check if the opponent has lucrative local continuation for her last
129 /* This check is ultra-important BTW. Without it domain checking does
130 * not bring that much of an advantage. It might even warrant it to by
131 * default do only this domain check. */
133 domain_hint(mc
, b
, &urgent
, m
->color
);
134 if (!is_pass(urgent
))
137 while (gamelen
-- && passes
< 2) {
139 domain_hint(mc
, &b2
, &urgent
, m
->color
);
143 if (!is_pass(urgent
)) {
146 m
.coord
= urgent
; m
.color
= color
;
147 if (board_play(&b2
, &m
) < 0) {
149 fprintf(stderr
, "Urgent move %d,%d is ILLEGAL:\n", coord_x(urgent
), coord_y(urgent
));
150 board_print(&b2
, stderr
);
157 board_play_random(&b2
, color
, &coord
);
160 if (unlikely(b2
.superko_violation
)) {
161 /* We ignore superko violations that are suicides. These
162 * are common only at the end of the game and are
163 * rather harmless. (They will not go through as a root
165 if (group_at(&b2
, coord
)) {
167 fprintf(stderr
, "Superko fun at %d,%d in\n", coord_x(coord
), coord_y(coord
));
169 board_print(&b2
, stderr
);
171 board_done_noalloc(&b2
);
175 fprintf(stderr
, "Ignoring superko at %d,%d in\n", coord_x(coord
), coord_y(coord
));
176 board_print(&b2
, stderr
);
178 b2
.superko_violation
= false;
183 char *cs
= coord2str(coord
);
184 fprintf(stderr
, "%s %s\n", stone2str(color
), cs
);
188 if (unlikely(is_pass(coord
))) {
194 color
= stone_other(color
);
197 if (MCDEBUGL(6 - !(i
% (mc
->games
/2))))
198 board_print(&b2
, stderr
);
200 float score
= board_fast_score(&b2
);
201 if (MCDEBUGL(5 - !(i
% (mc
->games
/2))))
202 fprintf(stderr
, "--- game result: %f\n", score
);
204 board_done_noalloc(&b2
);
205 return (m
->color
== S_WHITE
? (score
> 0 ? 1 : 0) : (score
< 0 ? 1 : 0));
210 montecarlo_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
212 struct montecarlo
*mc
= e
->data
;
216 /* resign when the hope for win vanishes */
217 coord_t top_coord
= resign
;
218 float top_ratio
= mc
->resign_ratio
;
220 /* We use [0] for pass. Normally, this is an inaccessible corner
221 * of board margin. */
222 struct move_stat moves
[b
->size2
];
223 memset(moves
, 0, sizeof(moves
));
226 int i
, superko
= 0, good_games
= 0;
227 for (i
= 0; i
< mc
->games
; i
++) {
228 int result
= play_random_game(mc
, b
, &m
, i
);
231 fprintf(stderr
, "\tresult %d\n", result
);
235 /* Uh. Oops? Er... */
236 top_coord
= pass
; top_ratio
= 0.5;
240 /* Superko. We just ignore this playout.
242 if (unlikely(superko
> 2 * mc
->games
)) {
243 /* Uhh. Triple ko, or something? */
245 fprintf(stderr
, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
248 /* This playout didn't count; we should not
249 * disadvantage moves that lead to a superko.
250 * And it is supposed to be rare. */
255 /* Multi-stone suicide. We play chinese rules,
256 * so we can't consider this. (Note that we
257 * unfortunately still consider this in playouts.) */
261 int pos
= is_pass(m
.coord
) ? 0 : m
.coord
.pos
;
267 /* Simple heuristic: avoid opening too low. Do not
268 * play on second or first line as first white or
269 * first two black moves.*/
270 if (coord_x(m
.coord
) < 3 || coord_x(m
.coord
) > b
->size
- 4
271 || coord_y(m
.coord
) < 3 || coord_y(m
.coord
) > b
->size
- 4)
275 losses
+= 1 - result
;
276 moves
[pos
].wins
+= result
;
278 if (unlikely(!losses
&& i
== mc
->loss_threshold
)) {
279 /* We played out many games and didn't lose once yet.
280 * This game is over. */
286 /* No moves to try??? */
288 fprintf(stderr
, "OUT OF MOVES! I will pass. But how did this happen?\n");
289 board_print(b
, stderr
);
295 float ratio
= (float) moves
[c
.pos
].wins
/ moves
[c
.pos
].games
;
296 /* Since pass is [0,0], we will pass only when we have nothing
298 if (ratio
>= top_ratio
) {
300 top_coord
= c
.pos
== 0 ? pass
: c
;
305 board_stats_print(b
, moves
, stderr
);
310 fprintf(stderr
, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord
), coord_y(top_coord
), top_ratio
, i
, superko
);
312 return coord_copy(top_coord
);
317 montecarlo_state_init(char *arg
)
319 struct montecarlo
*mc
= calloc(1, sizeof(struct montecarlo
));
321 mc
->last_hint
= pass
;
324 mc
->games
= MC_GAMES
;
325 mc
->gamelen
= MC_GAMELEN
;
326 mc
->atari_rate
= MC_ATARIRATE
;
327 mc
->local_rate
= MC_LOCALRATE
;
328 mc
->cut_rate
= MC_CUTRATE
;
331 char *optspec
, *next
= arg
;
334 next
+= strcspn(next
, ",");
335 if (*next
) { *next
++ = 0; } else { *next
= 0; }
337 char *optname
= optspec
;
338 char *optval
= strchr(optspec
, '=');
339 if (optval
) *optval
++ = 0;
341 if (!strcasecmp(optname
, "debug")) {
343 mc
->debug_level
= atoi(optval
);
346 } else if (!strcasecmp(optname
, "games") && optval
) {
347 mc
->games
= atoi(optval
);
348 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
349 mc
->gamelen
= atoi(optval
);
350 } else if (!strcasecmp(optname
, "pure")) {
351 mc
->atari_rate
= mc
->local_rate
= mc
->cut_rate
= 0;
352 } else if (!strcasecmp(optname
, "atarirate") && optval
) {
353 mc
->atari_rate
= atoi(optval
);
354 } else if (!strcasecmp(optname
, "localrate") && optval
) {
355 mc
->local_rate
= atoi(optval
);
356 } else if (!strcasecmp(optname
, "cutrate") && optval
) {
357 mc
->cut_rate
= atoi(optval
);
359 fprintf(stderr
, "MonteCarlo: Invalid engine argument %s or missing value\n", optname
);
364 mc
->resign_ratio
= 0.1; /* Resign when most games are lost. */
365 mc
->loss_threshold
= mc
->games
/ 10; /* Stop reading if no loss encountered in first n games. */
372 engine_montecarlo_init(char *arg
)
374 struct montecarlo
*mc
= montecarlo_state_init(arg
);
375 struct engine
*e
= calloc(1, sizeof(struct engine
));
376 e
->name
= "MonteCarlo Engine";
377 e
->comment
= "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
378 e
->genmove
= montecarlo_genmove
;