montecarlo/montecarlo.c

   1 #include <assert.h>
   2 #include <stdio.h>
   3 #include <stdlib.h>
   4 #include <string.h>
   5
   6 #include "board.h"
   7 #include "engine.h"
   8 #include "joseki/base.h"
   9 #include "move.h"
  10 #include "playout/elo.h"
  11 #include "playout/moggy.h"
  12 #include "playout/light.h"
  13 #include "montecarlo/internal.h"
  14 #include "montecarlo/montecarlo.h"
  15 #include "playout.h"
  16 #include "timeinfo.h"
  17
  18
  19 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
  20  * current board and records win/loss ratio for each first move. The move with
  21  * the biggest number of winning games gets played. */
  22 /* Note that while the library is based on New Zealand rules, this engine
  23  * returns moves according to Chinese rules. Thus, it does not return suicide
  24  * moves. It of course respects positional superko too. */
  25
  26 /* Pass me arguments like a=b,c=d,...
  27  * Supported arguments:
  28  * debug[=DEBUG_LEVEL]          1 is the default; more means more debugging prints
  29  * games=MC_GAMES               number of random games to play
  30  * gamelen=MC_GAMELEN           maximal length of played random game
  31  * playout={light,moggy,elo}[:playout_params]
  32  */
  33
  34
  35 #define MC_GAMES        40000
  36 #define MC_GAMELEN      400
  37
  38
  39 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
  40  * simply does not matter; even 100000 simulations are fast enough to
  41  * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
  42  * already. */
  43 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
  44  * to consider 'pass' among the moves, but this seems tricky. */
  45
  46
  47 void
  48 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
  49 {
  50         fprintf(f, "\n       ");
  51         int x, y;
  52         char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
  53         for (x = 1; x < board_size(board) - 1; x++)
  54                 fprintf(f, "%c    ", asdf[x - 1]);
  55         fprintf(f, "\n   +-");
  56         for (x = 1; x < board_size(board) - 1; x++)
  57                 fprintf(f, "-----");
  58         fprintf(f, "+\n");
  59         for (y = board_size(board) - 2; y >= 1; y--) {
  60                 fprintf(f, "%2d | ", y);
  61                 for (x = 1; x < board_size(board) - 1; x++)
  62                         if (moves[y * board_size(board) + x].games)
  63                                 fprintf(f, "%0.2f ", (float) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games);
  64                         else
  65                                 fprintf(f, "---- ");
  66                 fprintf(f, "| ");
  67                 for (x = 1; x < board_size(board) - 1; x++)
  68                         fprintf(f, "%4d ", moves[y * board_size(board) + x].games);
  69                 fprintf(f, "|\n");
  70         }
  71         fprintf(f, "   +-");
  72         for (x = 1; x < board_size(board) - 1; x++)
  73                 fprintf(f, "-----");
  74         fprintf(f, "+\n");
  75 }
  76
  77
  78 static coord_t *
  79 montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
  80 {
  81         struct montecarlo *mc = e->data;
  82
  83         if (ti->dim == TD_WALLTIME) {
  84                 fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n");
  85                 ti->period = TT_NULL;
  86         }
  87         if (ti->period == TT_NULL) {
  88                 ti->period = TT_MOVE;
  89                 ti->dim = TD_GAMES;
  90                 ti->len.games = MC_GAMES;
  91         }
  92         struct time_stop stop;
  93         time_stop_conditions(ti, b, 20, 40, &stop);
  94
  95         /* resign when the hope for win vanishes */
  96         coord_t top_coord = resign;
  97         float top_ratio = mc->resign_ratio;
  98
  99         /* We use [0] for pass. Normally, this is an inaccessible corner
 100          * of board margin. */
 101         struct move_stat moves[board_size2(b)];
 102         memset(moves, 0, sizeof(moves));
 103
 104         int losses = 0;
 105         int i, superko = 0, good_games = 0;
 106         for (i = 0; i < stop.desired.playouts; i++) {
 107                 assert(!b->superko_violation);
 108
 109                 struct board b2;
 110                 board_copy(&b2, b);
 111
 112                 coord_t coord;
 113                 board_play_random(&b2, color, &coord, NULL, NULL);
 114                 if (!is_pass(coord) && !group_at(&b2, coord)) {
 115                         /* Multi-stone suicide. We play chinese rules,
 116                          * so we can't consider this. (Note that we
 117                          * unfortunately still consider this in playouts.) */
 118                         if (DEBUGL(4)) {
 119                                 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
 120                                 board_print(b, stderr);
 121                         }
 122                         continue;
 123                 }
 124
 125                 if (DEBUGL(3))
 126                         fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);
 127
 128                 struct playout_setup ps = { .gamelen = mc->gamelen };
 129                 int result = play_random_game(&ps, &b2, color, NULL, NULL, mc->playout);
 130
 131                 board_done_noalloc(&b2);
 132
 133                 if (result == 0) {
 134                         /* Superko. We just ignore this playout.
 135                          * And play again. */
 136                         if (unlikely(superko > 2 * stop.desired.playouts)) {
 137                                 /* Uhh. Triple ko, or something? */
 138                                 if (MCDEBUGL(0))
 139                                         fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
 140                                 goto pass_wins;
 141                         }
 142                         /* This playout didn't count; we should not
 143                          * disadvantage moves that lead to a superko.
 144                          * And it is supposed to be rare. */
 145                         i--, superko++;
 146                         continue;
 147                 }
 148
 149                 if (MCDEBUGL(3))
 150                         fprintf(stderr, "\tresult for other player: %d\n", result);
 151
 152                 int pos = is_pass(coord) ? 0 : coord;
 153
 154                 good_games++;
 155                 moves[pos].games++;
 156
 157                 losses += result > 0;
 158                 moves[pos].wins += 1 - (result > 0);
 159
 160                 if (unlikely(!losses && i == mc->loss_threshold)) {
 161                         /* We played out many games and didn't lose once yet.
 162                          * This game is over. */
 163                         break;
 164                 }
 165         }
 166
 167         if (!good_games) {
 168                 /* No moves to try??? */
 169                 if (MCDEBUGL(0)) {
 170                         fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
 171                         board_print(b, stderr);
 172                 }
 173 pass_wins:
 174                 top_coord = pass; top_ratio = 0.5;
 175                 goto move_found;
 176         }
 177
 178         foreach_point(b) {
 179                 if (b->moves < 3) {
 180                         /* Simple heuristic: avoid opening too low. Do not
 181                          * play on second or first line as first white or
 182                          * first two black moves.*/
 183                         if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
 184                             || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
 185                                 continue;
 186                 }
 187
 188                 float ratio = (float) moves[c].wins / moves[c].games;
 189                 /* Since pass is [0,0], we will pass only when we have nothing
 190                  * better to do. */
 191                 if (ratio >= top_ratio) {
 192                         top_ratio = ratio;
 193                         top_coord = c == 0 ? pass : c;
 194                 }
 195         } foreach_point_end;
 196
 197         if (MCDEBUGL(2)) {
 198                 board_stats_print(b, moves, stderr);
 199         }
 200
 201 move_found:
 202         if (MCDEBUGL(1))
 203                 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);
 204
 205         return coord_copy(top_coord);
 206 }
 207
 208
 209 struct montecarlo *
 210 montecarlo_state_init(char *arg, struct board *b)
 211 {
 212         struct montecarlo *mc = calloc2(1, sizeof(struct montecarlo));
 213
 214         mc->debug_level = 1;
 215         mc->gamelen = MC_GAMELEN;
 216
 217         if (arg) {
 218                 char *optspec, *next = arg;
 219                 while (*next) {
 220                         optspec = next;
 221                         next += strcspn(next, ",");
 222                         if (*next) { *next++ = 0; } else { *next = 0; }
 223
 224                         char *optname = optspec;
 225                         char *optval = strchr(optspec, '=');
 226                         if (optval) *optval++ = 0;
 227
 228                         if (!strcasecmp(optname, "debug")) {
 229                                 if (optval)
 230                                         mc->debug_level = atoi(optval);
 231                                 else
 232                                         mc->debug_level++;
 233                         } else if (!strcasecmp(optname, "gamelen") && optval) {
 234                                 mc->gamelen = atoi(optval);
 235                         } else if (!strcasecmp(optname, "playout") && optval) {
 236                                 char *playoutarg = strchr(optval, ':');
 237                                 if (playoutarg)
 238                                         *playoutarg++ = 0;
 239                                 if (!strcasecmp(optval, "moggy")) {
 240                                         mc->playout = playout_moggy_init(playoutarg, b, joseki_load(b->size));
 241                                 } else if (!strcasecmp(optval, "light")) {
 242                                         mc->playout = playout_light_init(playoutarg, b);
 243                                 } else if (!strcasecmp(optval, "elo")) {
 244                                         mc->playout = playout_elo_init(playoutarg, b);
 245                                 } else {
 246                                         fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
 247                                 }
 248                         } else {
 249                                 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
 250                         }
 251                 }
 252         }
 253
 254         if (!mc->playout)
 255                 mc->playout = playout_light_init(NULL, b);
 256         mc->playout->debug_level = mc->debug_level;
 257
 258         mc->resign_ratio = 0.1; /* Resign when most games are lost. */
 259         mc->loss_threshold = 5000; /* Stop reading if no loss encountered in first 5000 games. */
 260
 261         return mc;
 262 }
 263
 264
 265 struct engine *
 266 engine_montecarlo_init(char *arg, struct board *b)
 267 {
 268         struct montecarlo *mc = montecarlo_state_init(arg, b);
 269         struct engine *e = calloc2(1, sizeof(struct engine));
 270         e->name = "MonteCarlo Engine";
 271         e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
 272         e->genmove = montecarlo_genmove;
 273         e->data = mc;
 274
 275         return e;
 276 }