playout/elo.c

   1 /* Playout player based on probability distribution generated over
   2  * the available moves. */
   3
   4 /* We use the ELO-based (Coulom, 2007) approach, where each board
   5  * feature (matched pattern, self-atari, capture, MC owner?, ...)
   6  * is pre-assigned "playing strength" (gamma).
   7  *
   8  * Then, the problem of choosing a move is basically a team
   9  * competition in ELO terms - each spot is represented by a team
  10  * of features appearing there; the team gamma is product of feature
  11  * gammas. The team gammas make for a probability distribution of
  12  * moves to be played.
  13  *
  14  * We use the general pattern classifier that will find the features
  15  * for us, and external datasets that can be harvested from a set
  16  * of game records (see the HACKING file for details): patterns.spat
  17  * as a dictionary of spatial stone configurations, and patterns.gamma
  18  * with strengths of particular features. */
  19
  20 #include <assert.h>
  21 #include <math.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24
  25 //#define DEBUG
  26 #include "board.h"
  27 #include "debug.h"
  28 #include "pattern.h"
  29 #include "patternsp.h"
  30 #include "playout.h"
  31 #include "playout/elo.h"
  32 #include "random.h"
  33 #include "tactics.h"
  34 #include "uct/prior.h"
  35
  36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
  37
  38
  39 /* Note that the context can be shared by multiple threads! */
  40
  41 struct patternset {
  42         pattern_spec ps;
  43         struct pattern_config pc;
  44         struct features_gamma *fg;
  45 };
  46
  47 struct elo_policy {
  48         float selfatari;
  49         struct patternset choose, assess;
  50         playout_elo_callbackp callback; void *callback_data;
  51 };
  52
  53
  54 /* This is the core of the policy - initializes and constructs the
  55  * probability distribution over the move candidates. */
  56
  57 int
  58 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
  59 {
  60         //struct elo_policy *pp = p->data;
  61         int moves = 0;
  62
  63         /* First, assign per-point probabilities. */
  64
  65         for (int f = 0; f < b->flen; f++) {
  66                 struct move m = { .coord = b->f[f], .color = to_play };
  67
  68                 /* Skip pass (for now)? */
  69                 if (is_pass(m.coord)) {
  70 skip_move:
  71                         probdist_set(pd, m.coord, 0);
  72                         continue;
  73                 }
  74                 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
  75
  76                 /* Skip invalid moves. */
  77                 if (!board_is_valid_move(b, &m))
  78                         goto skip_move;
  79
  80                 /* We shall never fill our own single-point eyes. */
  81                 /* XXX: In some rare situations, this prunes the best move:
  82                  * Bulk-five nakade with eye at 1-1 point. */
  83                 if (board_is_one_point_eye(b, m.coord, to_play)) {
  84                         goto skip_move;
  85                 }
  86
  87                 moves++;
  88                 /* Each valid move starts with gamma 1. */
  89                 double g = 1.f;
  90
  91                 /* Some easy features: */
  92                 /* XXX: We just disable them for now since we call the
  93                  * pattern matcher; you need the gammas file. */
  94 #if 0
  95                 if (is_bad_selfatari(b, to_play, m.coord))
  96                         g *= pp->selfatari;
  97 #endif
  98
  99                 /* Match pattern features: */
 100                 struct pattern p;
 101                 pattern_match(&ps->pc, ps->ps, &p, b, &m);
 102                 for (int i = 0; i < p.n; i++) {
 103                         /* Multiply together gammas of all pattern features. */
 104                         double gamma = feature_gamma(ps->fg, &p.f[i], NULL);
 105                         //char buf[256] = ""; feature2str(buf, &p.f[i]);
 106                         //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
 107                         g *= gamma;
 108                 }
 109
 110                 probdist_set(pd, m.coord, g);
 111                 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, m.coord), pd->items[f]);
 112         }
 113
 114         return moves;
 115 }
 116
 117
 118 struct lprobdist {
 119         int n;
 120 #define LPD_MAX 8
 121         coord_t coords[LPD_MAX];
 122         double items[LPD_MAX];
 123         double total;
 124
 125         /* Backups of original totals for restoring. */
 126         double btotal;
 127         double browtotals_v[10];
 128         int browtotals_i[10];
 129         int browtotals_n;
 130 };
 131
 132 #ifdef BOARD_GAMMA
 133
 134 static void
 135 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
 136 {
 137 #if 0
 138         struct elo_policy *pp = p->data;
 139         if (pd->total < PROBDIST_EPSILON)
 140                 return;
 141
 142         /* Compare to the manually created distribution. */
 143         /* XXX: This is now broken if callback is used. */
 144
 145         probdist_alloca(pdx, b);
 146         elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
 147         for (int i = 0; i < b->flen; i++) {
 148                 coord_t c = b->f[i];
 149                 if (is_pass(c)) continue;
 150                 // XXX: Hardcoded ignores[] structure
 151                 if (ignores[0] == c) continue;
 152                 double val = pd->items[c];
 153                 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
 154                         for (int j = 0; j < lpd->n; j++)
 155                                 if (lpd->coords[j] == c)
 156                                         val = lpd->items[j];
 157                 if (fabs(pdx.items[c] - val) < PROBDIST_EPSILON)
 158                         continue;
 159                 printf("[%s %d] manual %f board %f ", coord2sstr(c, b), b->pat3[c], pdx.items[c], pd->items[c]);
 160                 board_gamma_update(b, c, to_play);
 161                 printf("plainboard %f\n", pd->items[c]);
 162                 assert(0);
 163         }
 164 #endif
 165 }
 166
 167 coord_t
 168 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 169 {
 170         struct elo_policy *pp = p->data;
 171         /* The base board probdist. */
 172         struct probdist *pd = &b->prob[to_play - 1];
 173         /* The list of moves we do not consider in pd. */
 174         int ignores[10]; int ignores_n = 0;
 175         /* The list of local moves; we consider these separately. */
 176         struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
 177
 178         /* The engine might want to adjust our probdist. */
 179         if (pp->callback)
 180                 pp->callback(pp->callback_data, b, to_play, pd);
 181
 182         if (PLDEBUGL(5)) {
 183                 fprintf(stderr, "pd total pre %lf lpd %lf\n", pd->total, lpd.total);
 184         }
 185
 186 #define ignore_move(c_) do { \
 187         ignores[ignores_n++] = c_; \
 188         if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
 189                 /* Keep ignores[] sorted. We abuse the fact that we know \
 190                  * only one item can be out-of-order. */ \
 191                 coord_t cc = ignores[ignores_n - 2]; \
 192                 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
 193                 ignores[ignores_n - 1] = cc; \
 194         } \
 195         int rowi = coord_y(c_, pd->b); \
 196         lpd.browtotals_i[lpd.browtotals_n] = rowi; \
 197         lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
 198         probdist_mute(pd, c_); \
 199         if (PLDEBUGL(6)) \
 200                 fprintf(stderr, "ignored move %s(%lf) => tot pd %lf lpd %lf\n", coord2sstr(c_, pd->b), pd->items[c_], pd->total, lpd.total); \
 201 } while (0)
 202
 203         /* Make sure ko-prohibited move does not get picked. */
 204         if (!is_pass(b->ko.coord)) {
 205                 assert(b->ko.color == to_play);
 206                 ignore_move(b->ko.coord);
 207         }
 208
 209         /* Contiguity detection. */
 210         if (!is_pass(b->last_move.coord)) {
 211                 foreach_8neighbor(b, b->last_move.coord) {
 212                         if (c == b->ko.coord)
 213                                 continue; // already ignored
 214                         ignore_move(c);
 215
 216                         double val = probdist_one(pd, c) * b->gamma->gamma[FEAT_CONTIGUITY][1];
 217                         lpd.coords[lpd.n] = c;
 218                         lpd.items[lpd.n++] = val;
 219                         lpd.total += val;
 220                 } foreach_8neighbor_end;
 221         }
 222
 223         ignores[ignores_n] = pass;
 224         if (PLDEBUGL(5))
 225                 fprintf(stderr, "pd total post %lf lpd %lf\n", pd->total, lpd.total);
 226
 227         /* Verify sanity, possibly. */
 228         elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
 229
 230         /* Pick a move. */
 231         coord_t c = pass;
 232         double stab = fast_frandom() * (lpd.total + pd->total);
 233         if (PLDEBUGL(5))
 234                 fprintf(stderr, "stab %lf / (%lf + %lf)\n", stab, lpd.total, pd->total);
 235         if (stab < lpd.total - PROBDIST_EPSILON) {
 236                 /* Local probdist. */
 237                 if (PLDEBUGL(6)) {
 238                         /* Some debug prints. */
 239                         double tot = 0;
 240                         for (int i = 0; i < lpd.n; i++) {
 241                                 tot += lpd.items[i];
 242                                 struct pattern p;
 243                                 struct move m = { .color = to_play, .coord = lpd.coords[i] };
 244                                 if (board_at(b, m.coord) != S_NONE) {
 245                                         assert(lpd.items[i] == 0);
 246                                         continue;
 247                                 }
 248                                 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
 249                                 char s[256] = ""; pattern2str(s, &p);
 250                                 fprintf(stderr, "coord %s <%lf> [tot %lf] %s (p3:%d)\n", coord2sstr(lpd.coords[i], b), lpd.items[i], tot, s, pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
 251                         }
 252                 }
 253                 for (int i = 0; i < lpd.n; i++) {
 254                         if (stab <= lpd.items[i]) {
 255                                 c = lpd.coords[i];
 256                                 break;
 257                         }
 258                         stab -= lpd.items[i];
 259                 }
 260                 if (is_pass(c)) {
 261                         fprintf(stderr, "elo: local overstab [%lf]\n", stab);
 262                         abort();
 263                 }
 264
 265         } else if (pd->total >= PROBDIST_EPSILON) {
 266                 /* Global probdist. */
 267                 /* XXX: We re-stab inside. */
 268                 c = probdist_pick(pd, ignores);
 269
 270         } else {
 271                 if (PLDEBUGL(5))
 272                         fprintf(stderr, "ding!\n");
 273                 c = pass;
 274         }
 275
 276         /* Repair the damage. */
 277         if (pp->callback) {
 278                 /* XXX: Do something less horribly inefficient
 279                  * than just recomputing the whole pd. */
 280                 pd->total = 0;
 281                 for (int i = 0; i < board_size(pd->b); i++)
 282                         pd->rowtotals[i] = 0;
 283                 for (int i = 0; i < b->flen; i++) {
 284                         pd->items[b->f[i]] = 0;
 285                         board_gamma_update(b, b->f[i], to_play);
 286                 }
 287                 assert(fabs(pd->total - lpd.btotal) < PROBDIST_EPSILON);
 288
 289         } else {
 290                 pd->total = lpd.btotal;
 291                 /* If we touched a row multiple times (and we sure will),
 292                  * the latter value is obsolete; but since we go through
 293                  * the backups in reverse order, all is good. */
 294                 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
 295                         pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
 296         }
 297         return c;
 298 }
 299
 300 #else
 301
 302 coord_t
 303 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 304 {
 305         struct elo_policy *pp = p->data;
 306         probdist_alloca(pd, b);
 307         elo_get_probdist(p, &pp->choose, b, to_play, &pd);
 308         if (pp->callback)
 309                 pp->callback(pp->callback_data, b, to_play, &pd);
 310         if (pd.total < PROBDIST_EPSILON)
 311                 return pass;
 312         int ignores[1] = { pass };
 313         coord_t c = probdist_pick(&pd, ignores);
 314         return c;
 315 }
 316
 317 #endif
 318
 319 void
 320 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
 321 {
 322         struct elo_policy *pp = p->data;
 323         probdist_alloca(pd, map->b);
 324
 325         int moves;
 326         moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
 327
 328         /* It is a question how to transform the gamma to won games; we use
 329          * a naive approach currently, but not sure how well it works. */
 330         /* TODO: Try sqrt(p), atan(p)/pi*2. */
 331
 332         for (int f = 0; f < map->b->flen; f++) {
 333                 coord_t c = map->b->f[f];
 334                 if (!map->consider[c])
 335                         continue;
 336                 add_prior_value(map, c, probdist_one(&pd, c) / probdist_total(&pd), games);
 337         }
 338 }
 339
 340 void
 341 playout_elo_done(struct playout_policy *p)
 342 {
 343         struct elo_policy *pp = p->data;
 344         features_gamma_done(pp->choose.fg);
 345         features_gamma_done(pp->assess.fg);
 346 }
 347
 348
 349 void
 350 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
 351 {
 352         struct elo_policy *pp = p->data;
 353         pp->callback = callback;
 354         pp->callback_data = data;
 355 }
 356
 357 struct playout_policy *
 358 playout_elo_init(char *arg, struct board *b)
 359 {
 360         struct playout_policy *p = calloc2(1, sizeof(*p));
 361         struct elo_policy *pp = calloc2(1, sizeof(*pp));
 362         p->data = pp;
 363         p->choose = playout_elo_choose;
 364         p->assess = playout_elo_assess;
 365         p->done = playout_elo_done;
 366
 367         const char *gammafile = features_gamma_filename;
 368         /* Some defaults based on the table in Remi Coulom's paper. */
 369         pp->selfatari = 0.06;
 370
 371         struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
 372         int xspat = -1;
 373         bool precise_selfatari = false;
 374
 375         if (arg) {
 376                 char *optspec, *next = arg;
 377                 while (*next) {
 378                         optspec = next;
 379                         next += strcspn(next, ":");
 380                         if (*next) { *next++ = 0; } else { *next = 0; }
 381
 382                         char *optname = optspec;
 383                         char *optval = strchr(optspec, '=');
 384                         if (optval) *optval++ = 0;
 385
 386                         if (!strcasecmp(optname, "selfatari") && optval) {
 387                                 pp->selfatari = atof(optval);
 388                         } else if (!strcasecmp(optname, "precisesa")) {
 389                                 /* Use precise self-atari detection within
 390                                  * fast patterns. */
 391                                 precise_selfatari = !optval || atoi(optval);
 392                         } else if (!strcasecmp(optname, "gammafile") && optval) {
 393                                 /* patterns.gamma by default. We use this,
 394                                  * and need also ${gammafile}f (e.g.
 395                                  * patterns.gammaf) for fast (MC) features. */
 396                                 gammafile = strdup(optval);
 397                         } else if (!strcasecmp(optname, "xspat") && optval) {
 398                                 /* xspat==0: don't match spatial features
 399                                  * xspat==1: match *only* spatial features */
 400                                 xspat = atoi(optval);
 401                         } else {
 402                                 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
 403                                 exit(1);
 404                         }
 405                 }
 406         }
 407
 408         pc.spat_dict = spatial_dict_init(false);
 409
 410         pp->assess.pc = pc;
 411         pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
 412         memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
 413         for (int i = 0; i < FEAT_MAX; i++)
 414                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 415                         pp->assess.ps[i] = 0;
 416
 417         /* In playouts, we need to operate with much smaller set of features
 418          * in order to keep reasonable speed. */
 419         /* TODO: Configurable. */ /* TODO: Tune. */
 420         pp->choose.pc = FAST_PATTERN_CONFIG;
 421         pp->choose.pc.spat_dict = pc.spat_dict;
 422         char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
 423         pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
 424         memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
 425         for (int i = 0; i < FEAT_MAX; i++)
 426                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 427                         pp->choose.ps[i] = 0;
 428         if (precise_selfatari)
 429                 pp->choose.ps[FEAT_SELFATARI] = ~(1<<PF_SELFATARI_STUPID);
 430         board_gamma_set(b, pp->choose.fg, precise_selfatari);
 431
 432         return p;
 433 }