playout/elo.c

   1 /* Playout player based on probability distribution generated over
   2  * the available moves. */
   3
   4 /* We use the ELO-based (Coulom, 2007) approach, where each board
   5  * feature (matched pattern, self-atari, capture, MC owner?, ...)
   6  * is pre-assigned "playing strength" (gamma).
   7  *
   8  * Then, the problem of choosing a move is basically a team
   9  * competition in ELO terms - each spot is represented by a team
  10  * of features appearing there; the team gamma is product of feature
  11  * gammas. The team gammas make for a probability distribution of
  12  * moves to be played.
  13  *
  14  * We use the general pattern classifier that will find the features
  15  * for us, and external datasets that can be harvested from a set
  16  * of game records (see the HACKING file for details): patterns.spat
  17  * as a dictionary of spatial stone configurations, and patterns.gamma
  18  * with strengths of particular features. */
  19
  20 #include <assert.h>
  21 #include <math.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24
  25 //#define DEBUG
  26 #include "board.h"
  27 #include "debug.h"
  28 #include "fixp.h"
  29 #include "pattern.h"
  30 #include "patternsp.h"
  31 #include "playout.h"
  32 #include "playout/elo.h"
  33 #include "random.h"
  34 #include "tactics.h"
  35 #include "uct/prior.h"
  36
  37 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
  38
  39
  40 /* Note that the context can be shared by multiple threads! */
  41
  42 struct patternset {
  43         pattern_spec ps;
  44         struct pattern_config pc;
  45         struct features_gamma *fg;
  46 };
  47
  48 struct elo_policy {
  49         float selfatari;
  50         struct patternset choose, assess;
  51         playout_elo_callbackp callback; void *callback_data;
  52 };
  53
  54
  55 /* This is the core of the policy - initializes and constructs the
  56  * probability distribution over the move candidates. */
  57
  58 int
  59 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
  60 {
  61         //struct elo_policy *pp = p->data;
  62         int moves = 0;
  63
  64         /* First, assign per-point probabilities. */
  65
  66         for (int f = 0; f < b->flen; f++) {
  67                 struct move m = { .coord = b->f[f], .color = to_play };
  68
  69                 /* Skip pass (for now)? */
  70                 if (is_pass(m.coord)) {
  71 skip_move:
  72                         probdist_set(pd, m.coord, 0);
  73                         continue;
  74                 }
  75                 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
  76
  77                 /* Skip invalid moves. */
  78                 if (!board_is_valid_move(b, &m))
  79                         goto skip_move;
  80
  81                 /* We shall never fill our own single-point eyes. */
  82                 /* XXX: In some rare situations, this prunes the best move:
  83                  * Bulk-five nakade with eye at 1-1 point. */
  84                 if (board_is_one_point_eye(b, m.coord, to_play)) {
  85                         goto skip_move;
  86                 }
  87
  88                 moves++;
  89                 /* Each valid move starts with gamma 1. */
  90                 double g = 1.f;
  91
  92                 /* Some easy features: */
  93                 /* XXX: We just disable them for now since we call the
  94                  * pattern matcher; you need the gammas file. */
  95 #if 0
  96                 if (is_bad_selfatari(b, to_play, m.coord))
  97                         g *= pp->selfatari;
  98 #endif
  99
 100                 /* Match pattern features: */
 101                 struct pattern p;
 102                 pattern_match(&ps->pc, ps->ps, &p, b, &m);
 103                 for (int i = 0; i < p.n; i++) {
 104                         /* Multiply together gammas of all pattern features. */
 105                         double gamma = feature_gamma(ps->fg, &p.f[i], NULL);
 106                         //char buf[256] = ""; feature2str(buf, &p.f[i]);
 107                         //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
 108                         g *= gamma;
 109                 }
 110
 111                 probdist_set(pd, m.coord, double_to_fixp(g));
 112                 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), pd->items[f]);
 113         }
 114
 115         return moves;
 116 }
 117
 118
 119 struct lprobdist {
 120         int n;
 121 #define LPD_MAX 8
 122         coord_t coords[LPD_MAX];
 123         fixp_t items[LPD_MAX];
 124         fixp_t total;
 125
 126         /* Backups of original totals for restoring. */
 127         fixp_t btotal;
 128         fixp_t browtotals_v[10];
 129         int browtotals_i[10];
 130         int browtotals_n;
 131 };
 132
 133 #ifdef BOARD_GAMMA
 134
 135 static void
 136 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
 137 {
 138 #if 0
 139 #define PROBDIST_EPSILON double_to_fixp(0.01)
 140         struct elo_policy *pp = p->data;
 141         if (pd->total == 0)
 142                 return;
 143
 144         /* Compare to the manually created distribution. */
 145         /* XXX: This is now broken if callback is used. */
 146
 147         probdist_alloca(pdx, b);
 148         elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
 149         for (int i = 0; i < b->flen; i++) {
 150                 coord_t c = b->f[i];
 151                 if (is_pass(c)) continue;
 152                 if (c == b->ko.coord) continue;
 153                 fixp_t val = pd->items[c];
 154                 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
 155                         for (int j = 0; j < lpd->n; j++)
 156                                 if (lpd->coords[j] == c) {
 157                                         val = lpd->items[j];
 158                                         probdist_mute(&pdx, c);
 159
 160                                 }
 161                 if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
 162                         continue;
 163                 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
 164                 board_gamma_update(b, c, to_play);
 165                 printf("plainboard %f\n", fixp_to_double(pd->items[c]));
 166                 assert(0);
 167         }
 168         for (int r = 0; r < board_size(b); r++) {
 169                 if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
 170                         continue;
 171                 fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
 172                 assert(0);
 173         }
 174         assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
 175 #undef PROBDIST_EPSILON
 176 #endif
 177 }
 178
 179 coord_t
 180 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 181 {
 182         struct elo_policy *pp = p->data;
 183         /* The base board probdist. */
 184         struct probdist *pd = &b->prob[to_play - 1];
 185         /* The list of moves we do not consider in pd. */
 186         int ignores[10]; int ignores_n = 0;
 187         /* The list of local moves; we consider these separately. */
 188         struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
 189
 190         /* The engine might want to adjust our probdist. */
 191         if (pp->callback)
 192                 pp->callback(pp->callback_data, b, to_play, pd);
 193
 194         if (PLDEBUGL(5)) {
 195                 fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 196         }
 197
 198 #define ignore_move(c_) do { \
 199         ignores[ignores_n++] = c_; \
 200         if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
 201                 /* Keep ignores[] sorted. We abuse the fact that we know \
 202                  * only one item can be out-of-order. */ \
 203                 coord_t cc = ignores[ignores_n - 2]; \
 204                 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
 205                 ignores[ignores_n - 1] = cc; \
 206         } \
 207         int rowi = coord_y(c_, pd->b); \
 208         lpd.browtotals_i[lpd.browtotals_n] = rowi; \
 209         lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
 210         probdist_mute(pd, c_); \
 211         if (PLDEBUGL(6)) \
 212                 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
 213 } while (0)
 214
 215         /* Make sure ko-prohibited move does not get picked. */
 216         if (!is_pass(b->ko.coord)) {
 217                 assert(b->ko.color == to_play);
 218                 ignore_move(b->ko.coord);
 219         }
 220
 221         /* Contiguity detection. */
 222         if (!is_pass(b->last_move.coord)) {
 223                 foreach_8neighbor(b, b->last_move.coord) {
 224                         if (c == b->ko.coord)
 225                                 continue; // already ignored
 226                         if (board_at(b, c) != S_NONE) {
 227                                 assert(probdist_one(pd, c) == 0);
 228                                 continue;
 229                         }
 230                         ignore_move(c);
 231
 232                         fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
 233                         lpd.coords[lpd.n] = c;
 234                         lpd.items[lpd.n++] = val;
 235                         lpd.total += val;
 236                 } foreach_8neighbor_end;
 237         }
 238
 239         ignores[ignores_n] = pass;
 240         if (PLDEBUGL(5))
 241                 fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 242
 243         /* Verify sanity, possibly. */
 244         elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
 245
 246         /* Pick a move. */
 247         coord_t c = pass;
 248         fixp_t stab = fast_irandom(lpd.total + pd->total);
 249         if (PLDEBUGL(5))
 250                 fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
 251         if (stab < lpd.total) {
 252                 /* Local probdist. */
 253                 if (PLDEBUGL(6)) {
 254                         /* Some debug prints. */
 255                         fixp_t tot = 0;
 256                         for (int i = 0; i < lpd.n; i++) {
 257                                 tot += lpd.items[i];
 258                                 struct pattern p;
 259                                 struct move m = { .color = to_play, .coord = lpd.coords[i] };
 260                                 if (board_at(b, m.coord) != S_NONE) {
 261                                         assert(lpd.items[i] == 0);
 262                                         continue;
 263                                 }
 264                                 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
 265                                 char s[256] = ""; pattern2str(s, &p);
 266                                 fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
 267                                         coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
 268                                         fixp_to_double(tot), s,
 269                                         pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
 270                         }
 271                 }
 272                 for (int i = 0; i < lpd.n; i++) {
 273                         if (stab <= lpd.items[i]) {
 274                                 c = lpd.coords[i];
 275                                 break;
 276                         }
 277                         stab -= lpd.items[i];
 278                 }
 279                 if (is_pass(c)) {
 280                         fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
 281                         abort();
 282                 }
 283
 284         } else if (pd->total > 0) {
 285                 /* Global probdist. */
 286                 /* XXX: We re-stab inside. */
 287                 c = probdist_pick(pd, ignores);
 288
 289         } else {
 290                 if (PLDEBUGL(5))
 291                         fprintf(stderr, "ding!\n");
 292                 c = pass;
 293         }
 294
 295         /* Repair the damage. */
 296         if (pp->callback) {
 297                 /* XXX: Do something less horribly inefficient
 298                  * than just recomputing the whole pd. */
 299                 pd->total = 0;
 300                 for (int i = 0; i < board_size(pd->b); i++)
 301                         pd->rowtotals[i] = 0;
 302                 for (int i = 0; i < b->flen; i++) {
 303                         pd->items[b->f[i]] = 0;
 304                         board_gamma_update(b, b->f[i], to_play);
 305                 }
 306                 assert(pd->total == lpd.btotal);
 307
 308         } else {
 309                 pd->total = lpd.btotal;
 310                 /* If we touched a row multiple times (and we sure will),
 311                  * the latter value is obsolete; but since we go through
 312                  * the backups in reverse order, all is good. */
 313                 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
 314                         pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
 315         }
 316         return c;
 317 }
 318
 319 #else
 320
 321 coord_t
 322 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 323 {
 324         struct elo_policy *pp = p->data;
 325         probdist_alloca(pd, b);
 326         elo_get_probdist(p, &pp->choose, b, to_play, &pd);
 327         if (pp->callback)
 328                 pp->callback(pp->callback_data, b, to_play, &pd);
 329         if (pd.total == 0)
 330                 return pass;
 331         int ignores[1] = { pass };
 332         coord_t c = probdist_pick(&pd, ignores);
 333         return c;
 334 }
 335
 336 #endif
 337
 338 void
 339 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
 340 {
 341         struct elo_policy *pp = p->data;
 342         probdist_alloca(pd, map->b);
 343
 344         int moves;
 345         moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
 346
 347         /* It is a question how to transform the gamma to won games; we use
 348          * a naive approach currently, but not sure how well it works. */
 349         /* TODO: Try sqrt(p), atan(p)/pi*2. */
 350
 351         for (int f = 0; f < map->b->flen; f++) {
 352                 coord_t c = map->b->f[f];
 353                 if (!map->consider[c])
 354                         continue;
 355                 add_prior_value(map, c, fixp_to_double(probdist_one(&pd, c)) / fixp_to_double(probdist_total(&pd)), games);
 356         }
 357 }
 358
 359 void
 360 playout_elo_done(struct playout_policy *p)
 361 {
 362         struct elo_policy *pp = p->data;
 363         features_gamma_done(pp->choose.fg);
 364         features_gamma_done(pp->assess.fg);
 365 }
 366
 367
 368 void
 369 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
 370 {
 371         struct elo_policy *pp = p->data;
 372         pp->callback = callback;
 373         pp->callback_data = data;
 374 }
 375
 376 struct playout_policy *
 377 playout_elo_init(char *arg, struct board *b)
 378 {
 379         struct playout_policy *p = calloc2(1, sizeof(*p));
 380         struct elo_policy *pp = calloc2(1, sizeof(*pp));
 381         p->data = pp;
 382         p->choose = playout_elo_choose;
 383         p->assess = playout_elo_assess;
 384         p->done = playout_elo_done;
 385
 386         const char *gammafile = features_gamma_filename;
 387         /* Some defaults based on the table in Remi Coulom's paper. */
 388         pp->selfatari = 0.06;
 389
 390         struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
 391         int xspat = -1;
 392         bool precise_selfatari = false;
 393
 394         if (arg) {
 395                 char *optspec, *next = arg;
 396                 while (*next) {
 397                         optspec = next;
 398                         next += strcspn(next, ":");
 399                         if (*next) { *next++ = 0; } else { *next = 0; }
 400
 401                         char *optname = optspec;
 402                         char *optval = strchr(optspec, '=');
 403                         if (optval) *optval++ = 0;
 404
 405                         if (!strcasecmp(optname, "selfatari") && optval) {
 406                                 pp->selfatari = atof(optval);
 407                         } else if (!strcasecmp(optname, "precisesa")) {
 408                                 /* Use precise self-atari detection within
 409                                  * fast patterns. */
 410                                 precise_selfatari = !optval || atoi(optval);
 411                         } else if (!strcasecmp(optname, "gammafile") && optval) {
 412                                 /* patterns.gamma by default. We use this,
 413                                  * and need also ${gammafile}f (e.g.
 414                                  * patterns.gammaf) for fast (MC) features. */
 415                                 gammafile = strdup(optval);
 416                         } else if (!strcasecmp(optname, "xspat") && optval) {
 417                                 /* xspat==0: don't match spatial features
 418                                  * xspat==1: match *only* spatial features */
 419                                 xspat = atoi(optval);
 420                         } else {
 421                                 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
 422                                 exit(1);
 423                         }
 424                 }
 425         }
 426
 427         pc.spat_dict = spatial_dict_init(false);
 428
 429         pp->assess.pc = pc;
 430         pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
 431         memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
 432         for (int i = 0; i < FEAT_MAX; i++)
 433                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 434                         pp->assess.ps[i] = 0;
 435
 436         /* In playouts, we need to operate with much smaller set of features
 437          * in order to keep reasonable speed. */
 438         /* TODO: Configurable. */ /* TODO: Tune. */
 439         pp->choose.pc = FAST_PATTERN_CONFIG;
 440         pp->choose.pc.spat_dict = pc.spat_dict;
 441         char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
 442         pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
 443         memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
 444         for (int i = 0; i < FEAT_MAX; i++)
 445                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 446                         pp->choose.ps[i] = 0;
 447         if (precise_selfatari)
 448                 pp->choose.ps[FEAT_SELFATARI] = ~(1<<PF_SELFATARI_STUPID);
 449         board_gamma_set(b, pp->choose.fg, precise_selfatari);
 450
 451         return p;
 452 }