Introduce DEBUGL and MCDEBUGL for debug_level tests
[pachi.git] / montecarlo / montecarlo.c
blob382725a16353181ded6e7a3a824e7cb52ee63d82
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
5 #include "board.h"
6 #include "engine.h"
7 #include "move.h"
8 #include "montecarlo/hint.h"
9 #include "montecarlo/internal.h"
10 #include "montecarlo/montecarlo.h"
13 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
14 * current board and records win/loss ratio for each first move. The move with
15 * the biggest number of winning games gets played. */
16 /* Note that while the library is based on New Zealand rules, this engine
17 * returns moves according to Chinese rules. Thus, it does not return suicide
18 * moves. It of course respects positional superko too. */
20 /* Pass me arguments like a=b,c=d,...
21 * Supported arguments:
22 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
23 * games=MC_GAMES number of random games to play
24 * gamelen=MC_GAMELEN maximal length of played random game
26 * The following arguments tune domain-specific heuristics. They tend to carry
27 * very high performance penalty.
28 * pure turns all the heuristics off; you can then turn
29 * them on selectively
30 * atari_rate=MC_ATARIRATE how many of 100 moves should be non-random but
31 * fix local atari, if there is any
32 * local_rate=MC_LOCALRATE how many of 100 moves should be contact plays
33 * (tsuke or diagonal)
34 * cut_rate=MC_CUTRATE how many of 100 moves should fix local cuts,
35 * if there are any */
38 /* Times for 10000 runs on 1.6GHz Athlon. pure runs at ~500ms */
40 #define MC_GAMES 40000
41 #define MC_GAMELEN 400
42 #define MC_ATARIRATE 50 /* +200ms */
43 #define MC_CUTRATE 40 /* +100ms */
44 #define MC_LOCALRATE 30 /* +100ms */
47 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
48 * simply does not matter; even 100000 simulations are fast enough to
49 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
50 * already. */
51 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
52 * to consider 'pass' among the moves, but this seems tricky. */
55 void
56 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
58 fprintf(f, "\n ");
59 int x, y;
60 char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
61 for (x = 1; x < board->size - 1; x++)
62 fprintf(f, "%c ", asdf[x - 1]);
63 fprintf(f, "\n +-");
64 for (x = 1; x < board->size - 1; x++)
65 fprintf(f, "-----");
66 fprintf(f, "+\n");
67 for (y = board->size - 2; y >= 1; y--) {
68 fprintf(f, "%2d | ", y);
69 for (x = 1; x < board->size - 1; x++)
70 if (moves[y * board->size + x].games)
71 fprintf(f, "%0.2f ", (float) moves[y * board->size + x].wins / moves[y * board->size + x].games);
72 else
73 fprintf(f, "---- ");
74 fprintf(f, "| ");
75 for (x = 1; x < board->size - 1; x++)
76 fprintf(f, "%4d ", moves[y * board->size + x].games);
77 fprintf(f, "|\n");
79 fprintf(f, " +-");
80 for (x = 1; x < board->size - 1; x++)
81 fprintf(f, "-----");
82 fprintf(f, "+\n");
86 /* 1: m->color wins, 0: m->color loses
87 * -1 superko at the root
88 * -2 superko inside the game tree (NOT at root, that's simply invalid move)
89 * -3 first move is multi-stone suicide */
90 static int
91 play_random_game(struct montecarlo *mc, struct board *b, struct move *m, int i)
93 if (b->superko_violation) {
94 if (MCDEBUGL(0)) {
95 fprintf(stderr, "\tILLEGAL: superko violation at root!\n");
96 board_print(b, stderr);
98 return -1;
101 struct board b2;
102 board_copy(&b2, b);
104 board_play_random(&b2, m->color, &m->coord);
105 if (!is_pass(m->coord) && !group_at(&b2, m->coord)) {
106 if (MCDEBUGL(4)) {
107 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(m->coord), coord_y(m->coord));
108 board_print(&b2, stderr);
110 board_done_noalloc(&b2);
111 return -3;
114 if (MCDEBUGL(3))
115 fprintf(stderr, "[%d,%d] playing random game\n", coord_x(m->coord), coord_y(m->coord));
117 int gamelen = mc->gamelen - b2.moves;
118 if (gamelen < 10)
119 gamelen = 10;
121 enum stone color = stone_other(m->color);
122 coord_t urgent;
124 int passes = is_pass(m->coord);
126 /* Special check: We probably tenukied the last opponent's move. But
127 * check if the opponent has lucrative local continuation for her last
128 * move! */
129 /* This check is ultra-important BTW. Without it domain checking does
130 * not bring that much of an advantage. It might even warrant it to by
131 * default do only this domain check. */
132 urgent = pass;
133 domain_hint(mc, b, &urgent, m->color);
134 if (!is_pass(urgent))
135 goto play_urgent;
137 while (gamelen-- && passes < 2) {
138 urgent = pass;
139 domain_hint(mc, &b2, &urgent, m->color);
141 coord_t coord;
143 if (!is_pass(urgent)) {
144 struct move m;
145 play_urgent:
146 m.coord = urgent; m.color = color;
147 if (board_play(&b2, &m) < 0) {
148 if (MCDEBUGL(7)) {
149 fprintf(stderr, "Urgent move %d,%d is ILLEGAL:\n", coord_x(urgent), coord_y(urgent));
150 board_print(&b2, stderr);
152 goto play_random;
154 coord = urgent;
155 } else {
156 play_random:
157 board_play_random(&b2, color, &coord);
160 if (unlikely(b2.superko_violation)) {
161 /* We ignore superko violations that are suicides. These
162 * are common only at the end of the game and are
163 * rather harmless. (They will not go through as a root
164 * move anyway.) */
165 if (group_at(&b2, coord)) {
166 if (MCDEBUGL(3)) {
167 fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord), coord_y(coord));
168 if (MCDEBUGL(4))
169 board_print(&b2, stderr);
171 board_done_noalloc(&b2);
172 return -2;
173 } else {
174 if (MCDEBUGL(6)) {
175 fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord), coord_y(coord));
176 board_print(&b2, stderr);
178 b2.superko_violation = false;
182 if (MCDEBUGL(7)) {
183 char *cs = coord2str(coord);
184 fprintf(stderr, "%s %s\n", stone2str(color), cs);
185 free(cs);
188 if (unlikely(is_pass(coord))) {
189 passes++;
190 } else {
191 passes = 0;
194 color = stone_other(color);
197 if (MCDEBUGL(6 - !(i % (mc->games/2))))
198 board_print(&b2, stderr);
200 float score = board_fast_score(&b2);
201 if (MCDEBUGL(5 - !(i % (mc->games/2))))
202 fprintf(stderr, "--- game result: %f\n", score);
204 board_done_noalloc(&b2);
205 return (m->color == S_WHITE ? (score > 0 ? 1 : 0) : (score < 0 ? 1 : 0));
209 static coord_t *
210 montecarlo_genmove(struct engine *e, struct board *b, enum stone color)
212 struct montecarlo *mc = e->data;
213 struct move m;
214 m.color = color;
216 /* resign when the hope for win vanishes */
217 coord_t top_coord = resign;
218 float top_ratio = mc->resign_ratio;
220 /* We use [0] for pass. Normally, this is an inaccessible corner
221 * of board margin. */
222 struct move_stat moves[b->size2];
223 memset(moves, 0, sizeof(moves));
225 int losses = 0;
226 int i, superko = 0, good_games = 0;
227 for (i = 0; i < mc->games; i++) {
228 int result = play_random_game(mc, b, &m, i);
230 if (MCDEBUGL(3))
231 fprintf(stderr, "\tresult %d\n", result);
233 if (result == -1) {
234 pass_wins:
235 /* Uh. Oops? Er... */
236 top_coord = pass; top_ratio = 0.5;
237 goto move_found;
239 if (result == -2) {
240 /* Superko. We just ignore this playout.
241 * And play again. */
242 if (unlikely(superko > 2 * mc->games)) {
243 /* Uhh. Triple ko, or something? */
244 if (MCDEBUGL(0))
245 fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
246 goto pass_wins;
248 /* This playout didn't count; we should not
249 * disadvantage moves that lead to a superko.
250 * And it is supposed to be rare. */
251 i--, superko++;
252 continue;
254 if (result == -3) {
255 /* Multi-stone suicide. We play chinese rules,
256 * so we can't consider this. (Note that we
257 * unfortunately still consider this in playouts.) */
258 continue;
261 int pos = is_pass(m.coord) ? 0 : m.coord.pos;
263 good_games++;
264 moves[pos].games++;
266 if (b->moves < 3) {
267 /* Simple heuristic: avoid opening too low. Do not
268 * play on second or first line as first white or
269 * first two black moves.*/
270 if (coord_x(m.coord) < 3 || coord_x(m.coord) > b->size - 4
271 || coord_y(m.coord) < 3 || coord_y(m.coord) > b->size - 4)
272 continue;
275 losses += 1 - result;
276 moves[pos].wins += result;
278 if (unlikely(!losses && i == mc->loss_threshold)) {
279 /* We played out many games and didn't lose once yet.
280 * This game is over. */
281 break;
285 if (!good_games) {
286 /* No moves to try??? */
287 if (MCDEBUGL(0)) {
288 fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
289 board_print(b, stderr);
291 goto pass_wins;
294 foreach_point(b) {
295 float ratio = (float) moves[c.pos].wins / moves[c.pos].games;
296 /* Since pass is [0,0], we will pass only when we have nothing
297 * better to do. */
298 if (ratio >= top_ratio) {
299 top_ratio = ratio;
300 top_coord = c.pos == 0 ? pass : c;
302 } foreach_point_end;
304 if (MCDEBUGL(2)) {
305 board_stats_print(b, moves, stderr);
308 move_found:
309 if (MCDEBUGL(1))
310 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord), coord_y(top_coord), top_ratio, i, superko);
312 return coord_copy(top_coord);
316 struct montecarlo *
317 montecarlo_state_init(char *arg)
319 struct montecarlo *mc = calloc(1, sizeof(struct montecarlo));
321 mc->last_hint = pass;
323 mc->debug_level = 1;
324 mc->games = MC_GAMES;
325 mc->gamelen = MC_GAMELEN;
326 mc->atari_rate = MC_ATARIRATE;
327 mc->local_rate = MC_LOCALRATE;
328 mc->cut_rate = MC_CUTRATE;
330 if (arg) {
331 char *optspec, *next = arg;
332 while (*next) {
333 optspec = next;
334 next += strcspn(next, ",");
335 if (*next) { *next++ = 0; } else { *next = 0; }
337 char *optname = optspec;
338 char *optval = strchr(optspec, '=');
339 if (optval) *optval++ = 0;
341 if (!strcasecmp(optname, "debug")) {
342 if (optval)
343 mc->debug_level = atoi(optval);
344 else
345 mc->debug_level++;
346 } else if (!strcasecmp(optname, "games") && optval) {
347 mc->games = atoi(optval);
348 } else if (!strcasecmp(optname, "gamelen") && optval) {
349 mc->gamelen = atoi(optval);
350 } else if (!strcasecmp(optname, "pure")) {
351 mc->atari_rate = mc->local_rate = mc->cut_rate = 0;
352 } else if (!strcasecmp(optname, "atarirate") && optval) {
353 mc->atari_rate = atoi(optval);
354 } else if (!strcasecmp(optname, "localrate") && optval) {
355 mc->local_rate = atoi(optval);
356 } else if (!strcasecmp(optname, "cutrate") && optval) {
357 mc->cut_rate = atoi(optval);
358 } else {
359 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
364 mc->resign_ratio = 0.1; /* Resign when most games are lost. */
365 mc->loss_threshold = mc->games / 10; /* Stop reading if no loss encountered in first n games. */
367 return mc;
371 struct engine *
372 engine_montecarlo_init(char *arg)
374 struct montecarlo *mc = montecarlo_state_init(arg);
375 struct engine *e = calloc(1, sizeof(struct engine));
376 e->name = "MonteCarlo Engine";
377 e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
378 e->genmove = montecarlo_genmove;
379 e->data = mc;
381 return e;