Decrement node_sizes when freeing a node.
[pachi.git] / montecarlo / montecarlo.c
blob3348124dcb37e0af6e2d28cdde05ae68589f2e3a
1 #include <assert.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include "board.h"
7 #include "engine.h"
8 #include "move.h"
9 #include "playout/elo.h"
10 #include "playout/moggy.h"
11 #include "playout/light.h"
12 #include "montecarlo/internal.h"
13 #include "montecarlo/montecarlo.h"
14 #include "playout.h"
17 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
18 * current board and records win/loss ratio for each first move. The move with
19 * the biggest number of winning games gets played. */
20 /* Note that while the library is based on New Zealand rules, this engine
21 * returns moves according to Chinese rules. Thus, it does not return suicide
22 * moves. It of course respects positional superko too. */
24 /* Pass me arguments like a=b,c=d,...
25 * Supported arguments:
26 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
27 * games=MC_GAMES number of random games to play
28 * gamelen=MC_GAMELEN maximal length of played random game
29 * playout={light,moggy,elo}[:playout_params]
33 #define MC_GAMES 40000
34 #define MC_GAMELEN 400
37 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
38 * simply does not matter; even 100000 simulations are fast enough to
39 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
40 * already. */
41 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
42 * to consider 'pass' among the moves, but this seems tricky. */
45 void
46 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
48 fprintf(f, "\n ");
49 int x, y;
50 char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
51 for (x = 1; x < board_size(board) - 1; x++)
52 fprintf(f, "%c ", asdf[x - 1]);
53 fprintf(f, "\n +-");
54 for (x = 1; x < board_size(board) - 1; x++)
55 fprintf(f, "-----");
56 fprintf(f, "+\n");
57 for (y = board_size(board) - 2; y >= 1; y--) {
58 fprintf(f, "%2d | ", y);
59 for (x = 1; x < board_size(board) - 1; x++)
60 if (moves[y * board_size(board) + x].games)
61 fprintf(f, "%0.2f ", (float) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games);
62 else
63 fprintf(f, "---- ");
64 fprintf(f, "| ");
65 for (x = 1; x < board_size(board) - 1; x++)
66 fprintf(f, "%4d ", moves[y * board_size(board) + x].games);
67 fprintf(f, "|\n");
69 fprintf(f, " +-");
70 for (x = 1; x < board_size(board) - 1; x++)
71 fprintf(f, "-----");
72 fprintf(f, "+\n");
76 static coord_t *
77 montecarlo_genmove(struct engine *e, struct board *b, enum stone color)
79 struct montecarlo *mc = e->data;
81 /* resign when the hope for win vanishes */
82 coord_t top_coord = resign;
83 float top_ratio = mc->resign_ratio;
85 /* We use [0] for pass. Normally, this is an inaccessible corner
86 * of board margin. */
87 struct move_stat moves[board_size2(b)];
88 memset(moves, 0, sizeof(moves));
90 int losses = 0;
91 int i, superko = 0, good_games = 0;
92 for (i = 0; i < mc->games; i++) {
93 assert(!b->superko_violation);
95 struct board b2;
96 board_copy(&b2, b);
98 coord_t coord;
99 board_play_random(&b2, color, &coord, NULL, NULL);
100 if (!is_pass(coord) && !group_at(&b2, coord)) {
101 /* Multi-stone suicide. We play chinese rules,
102 * so we can't consider this. (Note that we
103 * unfortunately still consider this in playouts.) */
104 if (DEBUGL(4)) {
105 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
106 board_print(b, stderr);
108 continue;
111 if (DEBUGL(3))
112 fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);
114 struct playout_setup ps = { .gamelen = mc->gamelen };
115 int result = play_random_game(&ps, &b2, color, NULL, NULL, mc->playout);
117 board_done_noalloc(&b2);
119 if (result == 0) {
120 /* Superko. We just ignore this playout.
121 * And play again. */
122 if (unlikely(superko > 2 * mc->games)) {
123 /* Uhh. Triple ko, or something? */
124 if (MCDEBUGL(0))
125 fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
126 goto pass_wins;
128 /* This playout didn't count; we should not
129 * disadvantage moves that lead to a superko.
130 * And it is supposed to be rare. */
131 i--, superko++;
132 continue;
135 if (MCDEBUGL(3))
136 fprintf(stderr, "\tresult for other player: %d\n", result);
138 int pos = is_pass(coord) ? 0 : coord_raw(coord);
140 good_games++;
141 moves[pos].games++;
143 losses += result > 0;
144 moves[pos].wins += 1 - (result > 0);
146 if (unlikely(!losses && i == mc->loss_threshold)) {
147 /* We played out many games and didn't lose once yet.
148 * This game is over. */
149 break;
153 if (!good_games) {
154 /* No moves to try??? */
155 if (MCDEBUGL(0)) {
156 fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
157 board_print(b, stderr);
159 pass_wins:
160 top_coord = pass; top_ratio = 0.5;
161 goto move_found;
164 foreach_point(b) {
165 if (b->moves < 3) {
166 /* Simple heuristic: avoid opening too low. Do not
167 * play on second or first line as first white or
168 * first two black moves.*/
169 if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
170 || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
171 continue;
174 float ratio = (float) moves[coord_raw(c)].wins / moves[coord_raw(c)].games;
175 /* Since pass is [0,0], we will pass only when we have nothing
176 * better to do. */
177 if (ratio >= top_ratio) {
178 top_ratio = ratio;
179 top_coord = coord_raw(c) == 0 ? pass : c;
181 } foreach_point_end;
183 if (MCDEBUGL(2)) {
184 board_stats_print(b, moves, stderr);
187 move_found:
188 if (MCDEBUGL(1))
189 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);
191 return coord_copy(top_coord);
195 struct montecarlo *
196 montecarlo_state_init(char *arg)
198 struct montecarlo *mc = calloc(1, sizeof(struct montecarlo));
200 mc->debug_level = 1;
201 mc->games = MC_GAMES;
202 mc->gamelen = MC_GAMELEN;
204 if (arg) {
205 char *optspec, *next = arg;
206 while (*next) {
207 optspec = next;
208 next += strcspn(next, ",");
209 if (*next) { *next++ = 0; } else { *next = 0; }
211 char *optname = optspec;
212 char *optval = strchr(optspec, '=');
213 if (optval) *optval++ = 0;
215 if (!strcasecmp(optname, "debug")) {
216 if (optval)
217 mc->debug_level = atoi(optval);
218 else
219 mc->debug_level++;
220 } else if (!strcasecmp(optname, "games") && optval) {
221 mc->games = atoi(optval);
222 } else if (!strcasecmp(optname, "gamelen") && optval) {
223 mc->gamelen = atoi(optval);
224 } else if (!strcasecmp(optname, "playout") && optval) {
225 char *playoutarg = strchr(optval, ':');
226 if (playoutarg)
227 *playoutarg++ = 0;
228 if (!strcasecmp(optval, "moggy")) {
229 mc->playout = playout_moggy_init(playoutarg);
230 } else if (!strcasecmp(optval, "light")) {
231 mc->playout = playout_light_init(playoutarg);
232 } else if (!strcasecmp(optval, "elo")) {
233 mc->playout = playout_elo_init(playoutarg);
234 } else {
235 fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
237 } else {
238 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
243 if (!mc->playout)
244 mc->playout = playout_light_init(NULL);
245 mc->playout->debug_level = mc->debug_level;
247 mc->resign_ratio = 0.1; /* Resign when most games are lost. */
248 mc->loss_threshold = mc->games / 10; /* Stop reading if no loss encountered in first n games. */
250 return mc;
254 struct engine *
255 engine_montecarlo_init(char *arg, struct board *b)
257 struct montecarlo *mc = montecarlo_state_init(arg);
258 struct engine *e = calloc(1, sizeof(struct engine));
259 e->name = "MonteCarlo Engine";
260 e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
261 e->genmove = montecarlo_genmove;
262 e->data = mc;
264 return e;