Montecarlo: #include <assert.h>
[pachi.git] / montecarlo / montecarlo.c
blobe33f1551011eb7bdebeb83966bbc32feb9ae86c5
1 #include <assert.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include "board.h"
7 #include "engine.h"
8 #include "move.h"
9 #include "montecarlo/hint.h"
10 #include "montecarlo/internal.h"
11 #include "montecarlo/montecarlo.h"
12 #include "playout.h"
15 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
16 * current board and records win/loss ratio for each first move. The move with
17 * the biggest number of winning games gets played. */
18 /* Note that while the library is based on New Zealand rules, this engine
19 * returns moves according to Chinese rules. Thus, it does not return suicide
20 * moves. It of course respects positional superko too. */
22 /* Pass me arguments like a=b,c=d,...
23 * Supported arguments:
24 * debug[=DEBUG_LEVEL] 1 is the default; more means more debugging prints
25 * games=MC_GAMES number of random games to play
26 * gamelen=MC_GAMELEN maximal length of played random game
28 * The following arguments tune domain-specific heuristics. They tend to carry
29 * very high performance penalty.
30 * pure turns all the heuristics off; you can then turn
31 * them on selectively
32 * capture_rate=MC_CAPTURERATE how many of 100 moves should be non-random but
33 * fix local atari, if there is any
34 * atari_rate=MC_ATARIRATE how many of 100 moves should be non-random but
35 * make an atari, if there is any
36 * local_rate=MC_LOCALRATE how many of 100 moves should be contact plays
37 * (tsuke or diagonal)
38 * cut_rate=MC_CUTRATE how many of 100 moves should fix local cuts,
39 * if there are any */
42 #define MC_GAMES 40000
43 #define MC_GAMELEN 400
44 #define MC_CAPTURERATE 50
45 #define MC_ATARIRATE 50
46 #define MC_CUTRATE 40
47 #define MC_LOCALRATE 30
50 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
51 * simply does not matter; even 100000 simulations are fast enough to
52 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
53 * already. */
54 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
55 * to consider 'pass' among the moves, but this seems tricky. */
58 void
59 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
61 fprintf(f, "\n ");
62 int x, y;
63 char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
64 for (x = 1; x < board->size - 1; x++)
65 fprintf(f, "%c ", asdf[x - 1]);
66 fprintf(f, "\n +-");
67 for (x = 1; x < board->size - 1; x++)
68 fprintf(f, "-----");
69 fprintf(f, "+\n");
70 for (y = board->size - 2; y >= 1; y--) {
71 fprintf(f, "%2d | ", y);
72 for (x = 1; x < board->size - 1; x++)
73 if (moves[y * board->size + x].games)
74 fprintf(f, "%0.2f ", (float) moves[y * board->size + x].wins / moves[y * board->size + x].games);
75 else
76 fprintf(f, "---- ");
77 fprintf(f, "| ");
78 for (x = 1; x < board->size - 1; x++)
79 fprintf(f, "%4d ", moves[y * board->size + x].games);
80 fprintf(f, "|\n");
82 fprintf(f, " +-");
83 for (x = 1; x < board->size - 1; x++)
84 fprintf(f, "-----");
85 fprintf(f, "+\n");
89 static coord_t *
90 montecarlo_genmove(struct engine *e, struct board *b, enum stone color)
92 struct montecarlo *mc = e->data;
93 struct move m;
94 m.color = color;
96 /* resign when the hope for win vanishes */
97 coord_t top_coord = resign;
98 float top_ratio = mc->resign_ratio;
100 /* We use [0] for pass. Normally, this is an inaccessible corner
101 * of board margin. */
102 struct move_stat moves[b->size2];
103 memset(moves, 0, sizeof(moves));
105 int losses = 0;
106 int i, superko = 0, good_games = 0;
107 for (i = 0; i < mc->games; i++) {
108 assert(!b->superko_violation);
110 struct board b2;
111 board_copy(&b2, b);
112 int result = play_random_game(&b2, &m, mc->gamelen, domain_hint, mc);
113 board_done_noalloc(&b2);
115 if (result == -3) {
116 /* Multi-stone suicide. We play chinese rules,
117 * so we can't consider this. (Note that we
118 * unfortunately still consider this in playouts.) */
119 continue;
122 if (result == -2) {
123 /* Superko. We just ignore this playout.
124 * And play again. */
125 if (unlikely(superko > 2 * mc->games)) {
126 /* Uhh. Triple ko, or something? */
127 if (MCDEBUGL(0))
128 fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
129 goto pass_wins;
131 /* This playout didn't count; we should not
132 * disadvantage moves that lead to a superko.
133 * And it is supposed to be rare. */
134 i--, superko++;
135 continue;
138 if (MCDEBUGL(3))
139 fprintf(stderr, "\tresult %d\n", result);
141 int pos = is_pass(m.coord) ? 0 : coord_raw(m.coord);
143 good_games++;
144 moves[pos].games++;
146 losses += 1 - result;
147 moves[pos].wins += result;
149 if (unlikely(!losses && i == mc->loss_threshold)) {
150 /* We played out many games and didn't lose once yet.
151 * This game is over. */
152 break;
156 if (!good_games) {
157 /* No moves to try??? */
158 if (MCDEBUGL(0)) {
159 fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
160 board_print(b, stderr);
162 pass_wins:
163 top_coord = pass; top_ratio = 0.5;
164 goto move_found;
167 foreach_point(b) {
168 if (b->moves < 3) {
169 /* Simple heuristic: avoid opening too low. Do not
170 * play on second or first line as first white or
171 * first two black moves.*/
172 if (coord_x(c, b) < 3 || coord_x(c, b) > b->size - 4
173 || coord_y(c, b) < 3 || coord_y(c, b) > b->size - 4)
174 continue;
177 float ratio = (float) moves[coord_raw(c)].wins / moves[coord_raw(c)].games;
178 /* Since pass is [0,0], we will pass only when we have nothing
179 * better to do. */
180 if (ratio >= top_ratio) {
181 top_ratio = ratio;
182 top_coord = coord_raw(c) == 0 ? pass : c;
184 } foreach_point_end;
186 if (MCDEBUGL(2)) {
187 board_stats_print(b, moves, stderr);
190 move_found:
191 if (MCDEBUGL(1))
192 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);
194 return coord_copy(top_coord);
198 struct montecarlo *
199 montecarlo_state_init(char *arg)
201 struct montecarlo *mc = calloc(1, sizeof(struct montecarlo));
203 mc->last_hint = pass;
205 mc->debug_level = 1;
206 mc->games = MC_GAMES;
207 mc->gamelen = MC_GAMELEN;
208 mc->capture_rate = MC_CAPTURERATE;
209 mc->atari_rate = MC_ATARIRATE;
210 mc->local_rate = MC_LOCALRATE;
211 mc->cut_rate = MC_CUTRATE;
213 if (arg) {
214 char *optspec, *next = arg;
215 while (*next) {
216 optspec = next;
217 next += strcspn(next, ",");
218 if (*next) { *next++ = 0; } else { *next = 0; }
220 char *optname = optspec;
221 char *optval = strchr(optspec, '=');
222 if (optval) *optval++ = 0;
224 if (!strcasecmp(optname, "debug")) {
225 if (optval)
226 mc->debug_level = atoi(optval);
227 else
228 mc->debug_level++;
229 } else if (!strcasecmp(optname, "games") && optval) {
230 mc->games = atoi(optval);
231 } else if (!strcasecmp(optname, "gamelen") && optval) {
232 mc->gamelen = atoi(optval);
233 } else if (!strcasecmp(optname, "pure")) {
234 mc->capture_rate = mc->atari_rate = mc->local_rate = mc->cut_rate = 0;
235 } else if (!strcasecmp(optname, "capturerate") && optval) {
236 mc->capture_rate = atoi(optval);
237 } else if (!strcasecmp(optname, "atarirate") && optval) {
238 mc->atari_rate = atoi(optval);
239 } else if (!strcasecmp(optname, "localrate") && optval) {
240 mc->local_rate = atoi(optval);
241 } else if (!strcasecmp(optname, "cutrate") && optval) {
242 mc->cut_rate = atoi(optval);
243 } else {
244 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
249 mc->resign_ratio = 0.1; /* Resign when most games are lost. */
250 mc->loss_threshold = mc->games / 10; /* Stop reading if no loss encountered in first n games. */
252 return mc;
256 struct engine *
257 engine_montecarlo_init(char *arg)
259 struct montecarlo *mc = montecarlo_state_init(arg);
260 struct engine *e = calloc(1, sizeof(struct engine));
261 e->name = "MonteCarlo Engine";
262 e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
263 e->genmove = montecarlo_genmove;
264 e->data = mc;
266 return e;