Allow double for all floating point values in large configurations.
[pachi.git] / montecarlo / montecarlo.c
blob50df996e0a45756c4579ab4588a967c734c0083c
1 #include <assert.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include "board.h"
7 #include "engine.h"
8 #include "joseki/base.h"
9 #include "move.h"
10 #include "playout/elo.h"
11 #include "playout/moggy.h"
12 #include "playout/light.h"
13 #include "montecarlo/internal.h"
14 #include "montecarlo/montecarlo.h"
15 #include "playout.h"
16 #include "timeinfo.h"
19 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
20 * current board and records win/loss ratio for each first move. The move with
21 * the biggest number of winning games gets played. */
22 /* Note that while the library is based on New Zealand rules, this engine
23 * returns moves according to Chinese rules. Thus, it does not return suicide
24 * moves. It of course respects positional superko too. */
26 /* Pass me arguments like a=b,c=d,...
27 * Supported arguments:
28 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
29 * games=MC_GAMES number of random games to play
30 * gamelen=MC_GAMELEN maximal length of played random game
31 * playout={light,moggy,elo}[:playout_params]
35 #define MC_GAMES 40000
36 #define MC_GAMELEN 400
39 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
40 * simply does not matter; even 100000 simulations are fast enough to
41 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
42 * already. */
43 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
44 * to consider 'pass' among the moves, but this seems tricky. */
47 void
48 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
50 fprintf(f, "\n ");
51 int x, y;
52 char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
53 for (x = 1; x < board_size(board) - 1; x++)
54 fprintf(f, "%c ", asdf[x - 1]);
55 fprintf(f, "\n +-");
56 for (x = 1; x < board_size(board) - 1; x++)
57 fprintf(f, "-----");
58 fprintf(f, "+\n");
59 for (y = board_size(board) - 2; y >= 1; y--) {
60 fprintf(f, "%2d | ", y);
61 for (x = 1; x < board_size(board) - 1; x++)
62 if (moves[y * board_size(board) + x].games)
63 fprintf(f, "%0.2f ", (floating_t) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games);
64 else
65 fprintf(f, "---- ");
66 fprintf(f, "| ");
67 for (x = 1; x < board_size(board) - 1; x++)
68 fprintf(f, "%4d ", moves[y * board_size(board) + x].games);
69 fprintf(f, "|\n");
71 fprintf(f, " +-");
72 for (x = 1; x < board_size(board) - 1; x++)
73 fprintf(f, "-----");
74 fprintf(f, "+\n");
78 static coord_t *
79 montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
81 struct montecarlo *mc = e->data;
83 if (ti->dim == TD_WALLTIME) {
84 fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n");
85 ti->period = TT_NULL;
87 if (ti->period == TT_NULL) {
88 ti->period = TT_MOVE;
89 ti->dim = TD_GAMES;
90 ti->len.games = MC_GAMES;
92 struct time_stop stop;
93 time_stop_conditions(ti, b, 20, 40, &stop);
95 /* resign when the hope for win vanishes */
96 coord_t top_coord = resign;
97 floating_t top_ratio = mc->resign_ratio;
99 /* We use [0] for pass. Normally, this is an inaccessible corner
100 * of board margin. */
101 struct move_stat moves[board_size2(b)];
102 memset(moves, 0, sizeof(moves));
104 int losses = 0;
105 int i, superko = 0, good_games = 0;
106 for (i = 0; i < stop.desired.playouts; i++) {
107 assert(!b->superko_violation);
109 struct board b2;
110 board_copy(&b2, b);
112 coord_t coord;
113 board_play_random(&b2, color, &coord, NULL, NULL);
114 if (!is_pass(coord) && !group_at(&b2, coord)) {
115 /* Multi-stone suicide. We play chinese rules,
116 * so we can't consider this. (Note that we
117 * unfortunately still consider this in playouts.) */
118 if (DEBUGL(4)) {
119 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
120 board_print(b, stderr);
122 continue;
125 if (DEBUGL(3))
126 fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);
128 struct playout_setup ps = { .gamelen = mc->gamelen };
129 int result = play_random_game(&ps, &b2, color, NULL, NULL, mc->playout);
131 board_done_noalloc(&b2);
133 if (result == 0) {
134 /* Superko. We just ignore this playout.
135 * And play again. */
136 if (unlikely(superko > 2 * stop.desired.playouts)) {
137 /* Uhh. Triple ko, or something? */
138 if (MCDEBUGL(0))
139 fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
140 goto pass_wins;
142 /* This playout didn't count; we should not
143 * disadvantage moves that lead to a superko.
144 * And it is supposed to be rare. */
145 i--, superko++;
146 continue;
149 if (MCDEBUGL(3))
150 fprintf(stderr, "\tresult for other player: %d\n", result);
152 int pos = is_pass(coord) ? 0 : coord;
154 good_games++;
155 moves[pos].games++;
157 losses += result > 0;
158 moves[pos].wins += 1 - (result > 0);
160 if (unlikely(!losses && i == mc->loss_threshold)) {
161 /* We played out many games and didn't lose once yet.
162 * This game is over. */
163 break;
167 if (!good_games) {
168 /* No moves to try??? */
169 if (MCDEBUGL(0)) {
170 fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
171 board_print(b, stderr);
173 pass_wins:
174 top_coord = pass; top_ratio = 0.5;
175 goto move_found;
178 foreach_point(b) {
179 if (b->moves < 3) {
180 /* Simple heuristic: avoid opening too low. Do not
181 * play on second or first line as first white or
182 * first two black moves.*/
183 if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
184 || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
185 continue;
188 floating_t ratio = (floating_t) moves[c].wins / moves[c].games;
189 /* Since pass is [0,0], we will pass only when we have nothing
190 * better to do. */
191 if (ratio >= top_ratio) {
192 top_ratio = ratio;
193 top_coord = c == 0 ? pass : c;
195 } foreach_point_end;
197 if (MCDEBUGL(2)) {
198 board_stats_print(b, moves, stderr);
201 move_found:
202 if (MCDEBUGL(1))
203 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);
205 return coord_copy(top_coord);
209 struct montecarlo *
210 montecarlo_state_init(char *arg, struct board *b)
212 struct montecarlo *mc = calloc2(1, sizeof(struct montecarlo));
214 mc->debug_level = 1;
215 mc->gamelen = MC_GAMELEN;
217 if (arg) {
218 char *optspec, *next = arg;
219 while (*next) {
220 optspec = next;
221 next += strcspn(next, ",");
222 if (*next) { *next++ = 0; } else { *next = 0; }
224 char *optname = optspec;
225 char *optval = strchr(optspec, '=');
226 if (optval) *optval++ = 0;
228 if (!strcasecmp(optname, "debug")) {
229 if (optval)
230 mc->debug_level = atoi(optval);
231 else
232 mc->debug_level++;
233 } else if (!strcasecmp(optname, "gamelen") && optval) {
234 mc->gamelen = atoi(optval);
235 } else if (!strcasecmp(optname, "playout") && optval) {
236 char *playoutarg = strchr(optval, ':');
237 if (playoutarg)
238 *playoutarg++ = 0;
239 if (!strcasecmp(optval, "moggy")) {
240 mc->playout = playout_moggy_init(playoutarg, b, joseki_load(b->size));
241 } else if (!strcasecmp(optval, "light")) {
242 mc->playout = playout_light_init(playoutarg, b);
243 } else if (!strcasecmp(optval, "elo")) {
244 mc->playout = playout_elo_init(playoutarg, b);
245 } else {
246 fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
248 } else {
249 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
254 if (!mc->playout)
255 mc->playout = playout_light_init(NULL, b);
256 mc->playout->debug_level = mc->debug_level;
258 mc->resign_ratio = 0.1; /* Resign when most games are lost. */
259 mc->loss_threshold = 5000; /* Stop reading if no loss encountered in first 5000 games. */
261 return mc;
265 struct engine *
266 engine_montecarlo_init(char *arg, struct board *b)
268 struct montecarlo *mc = montecarlo_state_init(arg, b);
269 struct engine *e = calloc2(1, sizeof(struct engine));
270 e->name = "MonteCarlo Engine";
271 e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
272 e->genmove = montecarlo_genmove;
273 e->data = mc;
275 return e;