montecarlo/montecarlo.c

   1 #include <assert.h>
   2 #include <stdio.h>
   3 #include <stdlib.h>
   4 #include <string.h>
   5
   6 #include "board.h"
   7 #include "engine.h"
   8 #include "move.h"
   9 #include "playout/moggy.h"
  10 #include "playout/light.h"
  11 #include "montecarlo/internal.h"
  12 #include "montecarlo/montecarlo.h"
  13 #include "playout.h"
  14
  15
  16 /* This is simple monte-carlo engine. It plays MC_GAMES random games from the
  17  * current board and records win/loss ratio for each first move. The move with
  18  * the biggest number of winning games gets played. */
  19 /* Note that while the library is based on New Zealand rules, this engine
  20  * returns moves according to Chinese rules. Thus, it does not return suicide
  21  * moves. It of course respects positional superko too. */
  22
  23 /* Pass me arguments like a=b,c=d,...
  24  * Supported arguments:
  25  * debug[=DEBUG_LEVEL]          1 is the default; more means more debugging prints
  26  * games=MC_GAMES               number of random games to play
  27  * gamelen=MC_GAMELEN           maximal length of played random game
  28  * playout={light,moggy}[:playout_params]
  29  */
  30
  31
  32 #define MC_GAMES        40000
  33 #define MC_GAMELEN      400
  34
  35
  36 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
  37  * simply does not matter; even 100000 simulations are fast enough to
  38  * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
  39  * already. */
  40 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
  41  * to consider 'pass' among the moves, but this seems tricky. */
  42
  43
  44 void
  45 board_stats_print(struct board *board, struct move_stat *moves, FILE *f)
  46 {
  47         fprintf(f, "\n       ");
  48         int x, y;
  49         char asdf[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";
  50         for (x = 1; x < board_size(board) - 1; x++)
  51                 fprintf(f, "%c    ", asdf[x - 1]);
  52         fprintf(f, "\n   +-");
  53         for (x = 1; x < board_size(board) - 1; x++)
  54                 fprintf(f, "-----");
  55         fprintf(f, "+\n");
  56         for (y = board_size(board) - 2; y >= 1; y--) {
  57                 fprintf(f, "%2d | ", y);
  58                 for (x = 1; x < board_size(board) - 1; x++)
  59                         if (moves[y * board_size(board) + x].games)
  60                                 fprintf(f, "%0.2f ", (float) moves[y * board_size(board) + x].wins / moves[y * board_size(board) + x].games);
  61                         else
  62                                 fprintf(f, "---- ");
  63                 fprintf(f, "| ");
  64                 for (x = 1; x < board_size(board) - 1; x++)
  65                         fprintf(f, "%4d ", moves[y * board_size(board) + x].games);
  66                 fprintf(f, "|\n");
  67         }
  68         fprintf(f, "   +-");
  69         for (x = 1; x < board_size(board) - 1; x++)
  70                 fprintf(f, "-----");
  71         fprintf(f, "+\n");
  72 }
  73
  74
  75 static coord_t *
  76 montecarlo_genmove(struct engine *e, struct board *b, enum stone color)
  77 {
  78         struct montecarlo *mc = e->data;
  79
  80         /* resign when the hope for win vanishes */
  81         coord_t top_coord = resign;
  82         float top_ratio = mc->resign_ratio;
  83
  84         /* We use [0] for pass. Normally, this is an inaccessible corner
  85          * of board margin. */
  86         struct move_stat moves[board_size2(b)];
  87         memset(moves, 0, sizeof(moves));
  88
  89         int losses = 0;
  90         int i, superko = 0, good_games = 0;
  91         for (i = 0; i < mc->games; i++) {
  92                 assert(!b->superko_violation);
  93
  94                 struct board b2;
  95                 board_copy(&b2, b);
  96
  97                 coord_t coord;
  98                 board_play_random(&b2, color, &coord, NULL, NULL);
  99                 if (!is_pass(coord) && !group_at(&b2, coord)) {
 100                         /* Multi-stone suicide. We play chinese rules,
 101                          * so we can't consider this. (Note that we
 102                          * unfortunately still consider this in playouts.) */
 103                         if (DEBUGL(4)) {
 104                                 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
 105                                 board_print(b, stderr);
 106                         }
 107                         continue;
 108                 }
 109
 110                 if (DEBUGL(3))
 111                         fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);
 112
 113                 int result = play_random_game(&b2, color, mc->gamelen, NULL, mc->playout);
 114
 115                 board_done_noalloc(&b2);
 116
 117                 if (result < 0) {
 118                         /* Superko. We just ignore this playout.
 119                          * And play again. */
 120                         if (unlikely(superko > 2 * mc->games)) {
 121                                 /* Uhh. Triple ko, or something? */
 122                                 if (MCDEBUGL(0))
 123                                         fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
 124                                 goto pass_wins;
 125                         }
 126                         /* This playout didn't count; we should not
 127                          * disadvantage moves that lead to a superko.
 128                          * And it is supposed to be rare. */
 129                         i--, superko++;
 130                         continue;
 131                 }
 132
 133                 if (MCDEBUGL(3))
 134                         fprintf(stderr, "\tresult for other player: %d\n", result);
 135
 136                 int pos = is_pass(coord) ? 0 : coord_raw(coord);
 137
 138                 good_games++;
 139                 moves[pos].games++;
 140
 141                 losses += result;
 142                 moves[pos].wins += 1 - result;
 143
 144                 if (unlikely(!losses && i == mc->loss_threshold)) {
 145                         /* We played out many games and didn't lose once yet.
 146                          * This game is over. */
 147                         break;
 148                 }
 149         }
 150
 151         if (!good_games) {
 152                 /* No moves to try??? */
 153                 if (MCDEBUGL(0)) {
 154                         fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
 155                         board_print(b, stderr);
 156                 }
 157 pass_wins:
 158                 top_coord = pass; top_ratio = 0.5;
 159                 goto move_found;
 160         }
 161
 162         foreach_point(b) {
 163                 if (b->moves < 3) {
 164                         /* Simple heuristic: avoid opening too low. Do not
 165                          * play on second or first line as first white or
 166                          * first two black moves.*/
 167                         if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
 168                             || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
 169                                 continue;
 170                 }
 171
 172                 float ratio = (float) moves[coord_raw(c)].wins / moves[coord_raw(c)].games;
 173                 /* Since pass is [0,0], we will pass only when we have nothing
 174                  * better to do. */
 175                 if (ratio >= top_ratio) {
 176                         top_ratio = ratio;
 177                         top_coord = coord_raw(c) == 0 ? pass : c;
 178                 }
 179         } foreach_point_end;
 180
 181         if (MCDEBUGL(2)) {
 182                 board_stats_print(b, moves, stderr);
 183         }
 184
 185 move_found:
 186         if (MCDEBUGL(1))
 187                 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);
 188
 189         return coord_copy(top_coord);
 190 }
 191
 192
 193 struct montecarlo *
 194 montecarlo_state_init(char *arg)
 195 {
 196         struct montecarlo *mc = calloc(1, sizeof(struct montecarlo));
 197
 198         mc->debug_level = 1;
 199         mc->games = MC_GAMES;
 200         mc->gamelen = MC_GAMELEN;
 201
 202         if (arg) {
 203                 char *optspec, *next = arg;
 204                 while (*next) {
 205                         optspec = next;
 206                         next += strcspn(next, ",");
 207                         if (*next) { *next++ = 0; } else { *next = 0; }
 208
 209                         char *optname = optspec;
 210                         char *optval = strchr(optspec, '=');
 211                         if (optval) *optval++ = 0;
 212
 213                         if (!strcasecmp(optname, "debug")) {
 214                                 if (optval)
 215                                         mc->debug_level = atoi(optval);
 216                                 else
 217                                         mc->debug_level++;
 218                         } else if (!strcasecmp(optname, "games") && optval) {
 219                                 mc->games = atoi(optval);
 220                         } else if (!strcasecmp(optname, "gamelen") && optval) {
 221                                 mc->gamelen = atoi(optval);
 222                         } else if (!strcasecmp(optname, "playout") && optval) {
 223                                 char *playoutarg = strchr(optval, ':');
 224                                 if (playoutarg)
 225                                         *playoutarg++ = 0;
 226                                 if (!strcasecmp(optval, "moggy")) {
 227                                         mc->playout = playout_moggy_init(playoutarg);
 228                                 } else if (!strcasecmp(optval, "light")) {
 229                                         mc->playout = playout_light_init(playoutarg);
 230                                 } else {
 231                                         fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
 232                                 }
 233                         } else {
 234                                 fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
 235                         }
 236                 }
 237         }
 238
 239         if (!mc->playout)
 240                 mc->playout = playout_light_init(NULL);
 241         mc->playout->debug_level = mc->debug_level;
 242
 243         mc->resign_ratio = 0.1; /* Resign when most games are lost. */
 244         mc->loss_threshold = mc->games / 10; /* Stop reading if no loss encountered in first n games. */
 245
 246         return mc;
 247 }
 248
 249
 250 struct engine *
 251 engine_montecarlo_init(char *arg)
 252 {
 253         struct montecarlo *mc = montecarlo_state_init(arg);
 254         struct engine *e = calloc(1, sizeof(struct engine));
 255         e->name = "MonteCarlo Engine";
 256         e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
 257         e->genmove = montecarlo_genmove;
 258         e->data = mc;
 259
 260         return e;
 261 }