playout/elo.c

   1 /* Playout player based on probability distribution generated over
   2  * the available moves. */
   3
   4 /* We use the ELO-based (Coulom, 2007) approach, where each board
   5  * feature (matched pattern, self-atari, capture, MC owner?, ...)
   6  * is pre-assigned "playing strength" (gamma).
   7  *
   8  * Then, the problem of choosing a move is basically a team
   9  * competition in ELO terms - each spot is represented by a team
  10  * of features appearing there; the team gamma is product of feature
  11  * gammas. The team gammas make for a probability distribution of
  12  * moves to be played.
  13  *
  14  * We use the general pattern classifier that will find the features
  15  * for us, and external datasets that can be harvested from a set
  16  * of game records (see the HACKING file for details): patterns.spat
  17  * as a dictionary of spatial stone configurations, and patterns.gamma
  18  * with strengths of particular features. */
  19
  20 #include <assert.h>
  21 #include <math.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24
  25 #define DEBUG
  26 #include "board.h"
  27 #include "debug.h"
  28 #include "pattern.h"
  29 #include "patternsp.h"
  30 #include "playout.h"
  31 #include "playout/elo.h"
  32 #include "random.h"
  33 #include "tactics.h"
  34 #include "uct/prior.h"
  35
  36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
  37
  38
  39 /* Note that the context can be shared by multiple threads! */
  40
  41 struct patternset {
  42         pattern_spec ps;
  43         struct pattern_config pc;
  44         struct features_gamma *fg;
  45 };
  46
  47 struct elo_policy {
  48         float selfatari;
  49         struct patternset choose, assess;
  50 };
  51
  52
  53 /* This is the core of the policy - initializes and constructs the
  54  * probability distribution over the move candidates. */
  55
  56 int
  57 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
  58 {
  59         //struct elo_policy *pp = p->data;
  60         int moves = 0;
  61
  62         /* First, assign per-point probabilities. */
  63
  64         for (int f = 0; f < b->flen; f++) {
  65                 struct move m = { .coord = b->f[f], .color = to_play };
  66
  67                 /* Skip pass (for now)? */
  68                 if (is_pass(m.coord)) {
  69 skip_move:
  70                         probdist_set(pd, f, 0);
  71                         continue;
  72                 }
  73                 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
  74
  75                 /* Skip invalid moves. */
  76                 if (!board_is_valid_move(b, &m))
  77                         goto skip_move;
  78
  79                 /* We shall never fill our own single-point eyes. */
  80                 /* XXX: In some rare situations, this prunes the best move:
  81                  * Bulk-five nakade with eye at 1-1 point. */
  82                 if (board_is_one_point_eye(b, &m.coord, to_play)) {
  83                         goto skip_move;
  84                 }
  85
  86                 moves++;
  87                 /* Each valid move starts with gamma 1. */
  88                 float g = 1.f;
  89
  90                 /* Some easy features: */
  91                 /* XXX: We just disable them for now since we call the
  92                  * pattern matcher; you need the gammas file. */
  93 #if 0
  94                 if (is_bad_selfatari(b, to_play, m.coord))
  95                         g *= pp->selfatari;
  96 #endif
  97
  98                 /* Match pattern features: */
  99                 struct pattern p;
 100                 pattern_match(&ps->pc, ps->ps, &p, b, &m);
 101                 for (int i = 0; i < p.n; i++) {
 102                         /* Multiply together gammas of all pattern features. */
 103                         float gamma = feature_gamma(ps->fg, &p.f[i], NULL);
 104                         //char buf[256] = ""; feature2str(buf, &p.f[i]);
 105                         //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
 106                         g *= gamma;
 107                 }
 108
 109                 probdist_set(pd, f, g);
 110                 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, f), pd->items[f]);
 111         }
 112
 113         return moves;
 114 }
 115
 116
 117 coord_t
 118 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 119 {
 120         struct elo_policy *pp = p->data;
 121 #ifdef BOARD_GAMMA
 122         struct probdist *pd = &b->prob[to_play - 1];
 123         /* Make sure ko-prohibited move does not get picked. */
 124         if (!is_pass(b->ko.coord)) {
 125                 assert(b->ko.color == to_play);
 126                 probdist_set(pd, b->ko.coord, 0);
 127         }
 128         /* Contiguity detection. */
 129         if (!is_pass(b->last_move.coord)) {
 130                 foreach_8neighbor(b, b->last_move.coord) {
 131                         probdist_set(pd, c, pd->items[c] * b->gamma->gamma[FEAT_CONTIGUITY][1]);
 132                 } foreach_8neighbor_end;
 133         }
 134 #if 0
 135         /* Compare to the manually created distribution. */
 136         if (pd->total >= PROBDIST_EPSILON) {
 137                 float pdi[b->flen]; memset(pdi, 0, sizeof(pdi));
 138                 struct probdist pdx = { .n = b->flen, .items = pdi, .total = 0 };
 139                 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
 140                 for (int i = 0; i < pdx.n; i++) {
 141                         if (is_pass(b->f[i])) continue;
 142                         if (fabs(pdx.items[i] - pd->items[b->f[i]]) >= PROBDIST_EPSILON) {
 143                                 printf("[%s %d] manual %f board %f ", coord2sstr(b->f[i], b), b->pat3[b->f[i]], pdx.items[i], pd->items[b->f[i]]);
 144                                 board_gamma_update(b, b->f[i], to_play);
 145                                 printf("plainboard %f\n", pd->items[b->f[i]]);
 146                                 assert(0);
 147                         }
 148                 }
 149         }
 150 #endif
 151         /* Pick a move. */
 152         coord_t c = pd->total >= PROBDIST_EPSILON ? probdist_pick(pd) : pass;
 153         /* Repair the damage. */
 154         if (!is_pass(b->ko.coord))
 155                 board_gamma_update(b, b->ko.coord, to_play);
 156         if (!is_pass(b->last_move.coord)) {
 157                 foreach_8neighbor(b, b->last_move.coord) {
 158                         board_gamma_update(b, c, to_play);
 159                 } foreach_8neighbor_end;
 160         }
 161         return c;
 162
 163 #else
 164         float pdi[b->flen]; memset(pdi, 0, sizeof(pdi));
 165         struct probdist pd = { .n = b->flen, .items = pdi, .total = 0 };
 166         elo_get_probdist(p, &pp->choose, b, to_play, &pd);
 167         if (pd.total < PROBDIST_EPSILON)
 168                 return pass;
 169         int f = probdist_pick(&pd);
 170         return b->f[f];
 171 #endif
 172 }
 173
 174 void
 175 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
 176 {
 177         struct elo_policy *pp = p->data;
 178         float pdi[map->b->flen]; memset(pdi, 0, sizeof(pdi));
 179         struct probdist pd = { .n = map->b->flen, .items = pdi, .total = 0 };
 180
 181         int moves;
 182         moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
 183
 184         /* It is a question how to transform the gamma to won games; we use
 185          * a naive approach currently, but not sure how well it works. */
 186         /* TODO: Try sqrt(p), atan(p)/pi*2. */
 187
 188         for (int f = 0; f < map->b->flen; f++) {
 189                 coord_t c = map->b->f[f];
 190                 if (!map->consider[c])
 191                         continue;
 192                 add_prior_value(map, c, probdist_one(&pd, f) / probdist_total(&pd), games);
 193         }
 194 }
 195
 196 void
 197 playout_elo_done(struct playout_policy *p)
 198 {
 199         struct elo_policy *pp = p->data;
 200         features_gamma_done(pp->choose.fg);
 201         features_gamma_done(pp->assess.fg);
 202 }
 203
 204
 205 struct playout_policy *
 206 playout_elo_init(char *arg, struct board *b)
 207 {
 208         struct playout_policy *p = calloc(1, sizeof(*p));
 209         struct elo_policy *pp = calloc(1, sizeof(*pp));
 210         p->data = pp;
 211         p->choose = playout_elo_choose;
 212         p->assess = playout_elo_assess;
 213         p->done = playout_elo_done;
 214
 215         const char *gammafile = features_gamma_filename;
 216         /* Some defaults based on the table in Remi Coulom's paper. */
 217         pp->selfatari = 0.06;
 218
 219         struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
 220         int xspat = -1;
 221
 222         if (arg) {
 223                 char *optspec, *next = arg;
 224                 while (*next) {
 225                         optspec = next;
 226                         next += strcspn(next, ":");
 227                         if (*next) { *next++ = 0; } else { *next = 0; }
 228
 229                         char *optname = optspec;
 230                         char *optval = strchr(optspec, '=');
 231                         if (optval) *optval++ = 0;
 232
 233                         if (!strcasecmp(optname, "selfatari") && optval) {
 234                                 pp->selfatari = atof(optval);
 235                         } else if (!strcasecmp(optname, "gammafile") && optval) {
 236                                 /* patterns.gamma by default. We use this,
 237                                  * and need also ${gammafile}f (e.g.
 238                                  * patterns.gammaf) for fast (MC) features. */
 239                                 gammafile = strdup(optval);
 240                         } else if (!strcasecmp(optname, "xspat") && optval) {
 241                                 /* xspat==0: don't match spatial features
 242                                  * xspat==1: match *only* spatial features */
 243                                 xspat = atoi(optval);
 244                         } else {
 245                                 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
 246                                 exit(1);
 247                         }
 248                 }
 249         }
 250
 251         pc.spat_dict = spatial_dict_init(false);
 252
 253         pp->assess.pc = pc;
 254         pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
 255         memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
 256         for (int i = 0; i < FEAT_MAX; i++)
 257                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 258                         pp->assess.ps[i] = 0;
 259
 260         /* In playouts, we need to operate with much smaller set of features
 261          * in order to keep reasonable speed. */
 262         /* TODO: Configurable. */ /* TODO: Tune. */
 263         pp->choose.pc = FAST_PATTERN_CONFIG;
 264         pp->choose.pc.spat_dict = pc.spat_dict;
 265         char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
 266         pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
 267         memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
 268         for (int i = 0; i < FEAT_MAX; i++)
 269                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 270                         pp->choose.ps[i] = 0;
 271         board_gamma_set(b, pp->choose.fg);
 272
 273         return p;
 274 }