8 #include "joseki/base.h"
10 #include "playout/elo.h"
11 #include "playout/moggy.h"
12 #include "playout/light.h"
13 #include "montecarlo/internal.h"
14 #include "montecarlo/montecarlo.h"
19 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
20 * current board and records win/loss ratio for each first move. The move with
21 * the biggest number of winning games gets played. */
22 /* Note that while the library is based on New Zealand rules, this engine
23 * returns moves according to Chinese rules. Thus, it does not return suicide
24 * moves. It of course respects positional superko too. */
26 /* Pass me arguments like a=b,c=d,...
27 * Supported arguments:
28 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
29 * games=MC_GAMES number of random games to play
30 * gamelen=MC_GAMELEN maximal length of played random game
31 * playout={light,moggy,elo}[:playout_params]
35 #define MC_GAMES 40000
36 #define MC_GAMELEN 400
39 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
40 * simply does not matter; even 100000 simulations are fast enough to
41 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
43 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
44 * to consider 'pass' among the moves, but this seems tricky. */
48 board_stats_print(struct board
*board
, struct move_stat
*moves
, FILE *f
)
52 char asdf
[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
53 for (x
= 1; x
< board_size(board
) - 1; x
++)
54 fprintf(f
, "%c ", asdf
[x
- 1]);
56 for (x
= 1; x
< board_size(board
) - 1; x
++)
59 for (y
= board_size(board
) - 2; y
>= 1; y
--) {
60 fprintf(f
, "%2d | ", y
);
61 for (x
= 1; x
< board_size(board
) - 1; x
++)
62 if (moves
[y
* board_size(board
) + x
].games
)
63 fprintf(f
, "%0.2f ", (float) moves
[y
* board_size(board
) + x
].wins
/ moves
[y
* board_size(board
) + x
].games
);
67 for (x
= 1; x
< board_size(board
) - 1; x
++)
68 fprintf(f
, "%4d ", moves
[y
* board_size(board
) + x
].games
);
72 for (x
= 1; x
< board_size(board
) - 1; x
++)
79 montecarlo_genmove(struct engine
*e
, struct board
*b
, struct time_info
*ti
, enum stone color
, bool pass_all_alive
)
81 struct montecarlo
*mc
= e
->data
;
83 if (ti
->dim
== TD_WALLTIME
) {
84 fprintf(stderr
, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n");
87 if (ti
->period
== TT_NULL
) {
90 ti
->len
.games
= MC_GAMES
;
92 struct time_stop stop
;
93 time_stop_conditions(ti
, b
, 20, 40, &stop
);
95 /* resign when the hope for win vanishes */
96 coord_t top_coord
= resign
;
97 float top_ratio
= mc
->resign_ratio
;
99 /* We use [0] for pass. Normally, this is an inaccessible corner
100 * of board margin. */
101 struct move_stat moves
[board_size2(b
)];
102 memset(moves
, 0, sizeof(moves
));
105 int i
, superko
= 0, good_games
= 0;
106 for (i
= 0; i
< stop
.desired
.playouts
; i
++) {
107 assert(!b
->superko_violation
);
113 board_play_random(&b2
, color
, &coord
, NULL
, NULL
);
114 if (!is_pass(coord
) && !group_at(&b2
, coord
)) {
115 /* Multi-stone suicide. We play chinese rules,
116 * so we can't consider this. (Note that we
117 * unfortunately still consider this in playouts.) */
119 fprintf(stderr
, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord
, b
), coord_y(coord
, b
));
120 board_print(b
, stderr
);
126 fprintf(stderr
, "[%d,%d color %d] playing random game\n", coord_x(coord
, b
), coord_y(coord
, b
), color
);
128 struct playout_setup ps
= { .gamelen
= mc
->gamelen
};
129 int result
= play_random_game(&ps
, &b2
, color
, NULL
, NULL
, mc
->playout
);
131 board_done_noalloc(&b2
);
134 /* Superko. We just ignore this playout.
136 if (unlikely(superko
> 2 * stop
.desired
.playouts
)) {
137 /* Uhh. Triple ko, or something? */
139 fprintf(stderr
, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
142 /* This playout didn't count; we should not
143 * disadvantage moves that lead to a superko.
144 * And it is supposed to be rare. */
150 fprintf(stderr
, "\tresult for other player: %d\n", result
);
152 int pos
= is_pass(coord
) ? 0 : coord
;
157 losses
+= result
> 0;
158 moves
[pos
].wins
+= 1 - (result
> 0);
160 if (unlikely(!losses
&& i
== mc
->loss_threshold
)) {
161 /* We played out many games and didn't lose once yet.
162 * This game is over. */
168 /* No moves to try??? */
170 fprintf(stderr
, "OUT OF MOVES! I will pass. But how did this happen?\n");
171 board_print(b
, stderr
);
174 top_coord
= pass
; top_ratio
= 0.5;
180 /* Simple heuristic: avoid opening too low. Do not
181 * play on second or first line as first white or
182 * first two black moves.*/
183 if (coord_x(c
, b
) < 3 || coord_x(c
, b
) > board_size(b
) - 4
184 || coord_y(c
, b
) < 3 || coord_y(c
, b
) > board_size(b
) - 4)
188 float ratio
= (float) moves
[c
].wins
/ moves
[c
].games
;
189 /* Since pass is [0,0], we will pass only when we have nothing
191 if (ratio
>= top_ratio
) {
193 top_coord
= c
== 0 ? pass
: c
;
198 board_stats_print(b
, moves
, stderr
);
203 fprintf(stderr
, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord
, b
), coord_y(top_coord
, b
), top_ratio
, i
, superko
);
205 return coord_copy(top_coord
);
210 montecarlo_state_init(char *arg
, struct board
*b
)
212 struct montecarlo
*mc
= calloc2(1, sizeof(struct montecarlo
));
215 mc
->gamelen
= MC_GAMELEN
;
218 char *optspec
, *next
= arg
;
221 next
+= strcspn(next
, ",");
222 if (*next
) { *next
++ = 0; } else { *next
= 0; }
224 char *optname
= optspec
;
225 char *optval
= strchr(optspec
, '=');
226 if (optval
) *optval
++ = 0;
228 if (!strcasecmp(optname
, "debug")) {
230 mc
->debug_level
= atoi(optval
);
233 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
234 mc
->gamelen
= atoi(optval
);
235 } else if (!strcasecmp(optname
, "playout") && optval
) {
236 char *playoutarg
= strchr(optval
, ':');
239 if (!strcasecmp(optval
, "moggy")) {
240 mc
->playout
= playout_moggy_init(playoutarg
, b
, joseki_load(b
->size
));
241 } else if (!strcasecmp(optval
, "light")) {
242 mc
->playout
= playout_light_init(playoutarg
, b
);
243 } else if (!strcasecmp(optval
, "elo")) {
244 mc
->playout
= playout_elo_init(playoutarg
, b
);
246 fprintf(stderr
, "MonteCarlo: Invalid playout policy %s\n", optval
);
249 fprintf(stderr
, "MonteCarlo: Invalid engine argument %s or missing value\n", optname
);
255 mc
->playout
= playout_light_init(NULL
, b
);
256 mc
->playout
->debug_level
= mc
->debug_level
;
258 mc
->resign_ratio
= 0.1; /* Resign when most games are lost. */
259 mc
->loss_threshold
= 5000; /* Stop reading if no loss encountered in first 5000 games. */
266 engine_montecarlo_init(char *arg
, struct board
*b
)
268 struct montecarlo
*mc
= montecarlo_state_init(arg
, b
);
269 struct engine
*e
= calloc2(1, sizeof(struct engine
));
270 e
->name
= "MonteCarlo Engine";
271 e
->comment
= "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
272 e
->genmove
= montecarlo_genmove
;