playout/elo.c

   1 /* Playout player based on probability distribution generated over
   2  * the available moves. */
   3
   4 /* We use the ELO-based (Coulom, 2007) approach, where each board
   5  * feature (matched pattern, self-atari, capture, MC owner?, ...)
   6  * is pre-assigned "playing strength" (gamma).
   7  *
   8  * Then, the problem of choosing a move is basically a team
   9  * competition in ELO terms - each spot is represented by a team
  10  * of features appearing there; the team gamma is product of feature
  11  * gammas. The team gammas make for a probability distribution of
  12  * moves to be played.
  13  *
  14  * We use the general pattern classifier that will find the features
  15  * for us, and external datasets that can be harvested from a set
  16  * of game records (see the HACKING file for details): patterns.spat
  17  * as a dictionary of spatial stone configurations, and patterns.gamma
  18  * with strengths of particular features. */
  19
  20 /* TODO: Counter-captures are not appreciated properly. This seems to be
  21  * a major strength problem. */
  22
  23 #include <assert.h>
  24 #include <math.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27
  28 //#define DEBUG
  29 #include "board.h"
  30 #include "debug.h"
  31 #include "fixp.h"
  32 #include "pattern.h"
  33 #include "patternsp.h"
  34 #include "playout.h"
  35 #include "playout/elo.h"
  36 #include "random.h"
  37 #include "uct/prior.h"
  38
  39 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
  40
  41
  42 /* Note that the context can be shared by multiple threads! */
  43
  44 struct patternset {
  45         pattern_spec ps;
  46         struct pattern_config pc;
  47         struct features_gamma *fg;
  48 };
  49
  50 struct elo_policy {
  51         bool assess_fastpat;
  52         float selfatari;
  53         struct patternset choose, assess;
  54         playout_elo_callbackp callback; void *callback_data;
  55
  56         enum {
  57                 EAV_TOTAL,
  58                 EAV_BEST,
  59         } assess_eval;
  60         enum {
  61                 EAT_LINEAR,
  62                 EAT_ATAN,
  63                 EAT_SIGMOID,
  64         } assess_transform;
  65         double assess_sigmb;
  66 };
  67
  68
  69 /* This is the core of the policy - initializes and constructs the
  70  * probability distribution over the move candidates. */
  71
  72 int
  73 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
  74 {
  75         //struct elo_policy *pp = p->data;
  76         int moves = 0;
  77
  78         /* First, assign per-point probabilities. */
  79
  80         for (int f = 0; f < b->flen; f++) {
  81                 struct move m = { .coord = b->f[f], .color = to_play };
  82
  83                 /* Skip pass (for now)? */
  84                 if (is_pass(m.coord)) {
  85 skip_move:
  86                         probdist_set(pd, m.coord, 0);
  87                         continue;
  88                 }
  89                 if (PLDEBUGL(7))
  90                         fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
  91
  92                 /* Skip invalid moves. */
  93                 if (!board_is_valid_move(b, &m))
  94                         goto skip_move;
  95
  96                 /* We shall never fill our own single-point eyes. */
  97                 /* XXX: In some rare situations, this prunes the best move:
  98                  * Bulk-five nakade with eye at 1-1 point. */
  99                 if (board_is_one_point_eye(b, m.coord, to_play)) {
 100                         goto skip_move;
 101                 }
 102
 103                 moves++;
 104                 /* Each valid move starts with gamma 1. */
 105                 double g = 1.f;
 106
 107                 /* Some easy features: */
 108                 /* XXX: We just disable them for now since we call the
 109                  * pattern matcher; you need the gammas file. */
 110 #if 0
 111                 if (is_bad_selfatari(b, to_play, m.coord))
 112                         g *= pp->selfatari;
 113 #endif
 114
 115                 /* Match pattern features: */
 116                 struct pattern pat;
 117                 pattern_match(&ps->pc, ps->ps, &pat, b, &m);
 118                 for (int i = 0; i < pat.n; i++) {
 119                         /* Multiply together gammas of all pattern features. */
 120                         double gamma = feature_gamma(ps->fg, &pat.f[i], NULL);
 121                         if (PLDEBUGL(7)) {
 122                                 char buf[256] = ""; feature2str(buf, &pat.f[i]);
 123                                 fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
 124                         }
 125                         g *= gamma;
 126                 }
 127
 128                 probdist_set(pd, m.coord, double_to_fixp(g));
 129                 if (PLDEBUGL(7))
 130                         fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), g);
 131         }
 132
 133         return moves;
 134 }
 135
 136
 137 struct lprobdist {
 138         int n;
 139 #define LPD_MAX 8
 140         coord_t coords[LPD_MAX];
 141         fixp_t items[LPD_MAX];
 142         fixp_t total;
 143
 144         /* Backups of original totals for restoring. */
 145         fixp_t btotal;
 146         fixp_t browtotals_v[10];
 147         int browtotals_i[10];
 148         int browtotals_n;
 149 };
 150
 151 #if defined(BOARD_GAMMA) && defined(BOARD_TRAITS)
 152
 153 static void
 154 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
 155 {
 156 #if 0
 157 #define PROBDIST_EPSILON double_to_fixp(0.01)
 158         struct elo_policy *pp = p->data;
 159         if (pd->total == 0)
 160                 return;
 161
 162         /* Compare to the manually created distribution. */
 163         /* XXX: This is now broken if callback is used. */
 164
 165         probdist_alloca(pdx, b);
 166         elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
 167         for (int i = 0; i < b->flen; i++) {
 168                 coord_t c = b->f[i];
 169                 if (is_pass(c)) continue;
 170                 if (c == b->ko.coord) continue;
 171                 fixp_t val = pd->items[c];
 172                 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
 173                         for (int j = 0; j < lpd->n; j++)
 174                                 if (lpd->coords[j] == c) {
 175                                         val = lpd->items[j];
 176                                         probdist_mute(&pdx, c);
 177
 178                                 }
 179                 if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
 180                         continue;
 181                 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
 182                 board_gamma_update(b, c, to_play);
 183                 printf("plainboard %f\n", fixp_to_double(pd->items[c]));
 184                 assert(0);
 185         }
 186         for (int r = 0; r < board_size(b); r++) {
 187                 if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
 188                         continue;
 189                 fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
 190                 assert(0);
 191         }
 192         assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
 193 #undef PROBDIST_EPSILON
 194 #endif
 195 }
 196
 197 coord_t
 198 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 199 {
 200         struct elo_policy *pp = p->data;
 201         /* The base board probdist. */
 202         struct probdist *pd = &b->prob[to_play - 1];
 203         /* The list of moves we do not consider in pd. */
 204         int ignores[10]; int ignores_n = 0;
 205         /* The list of local moves; we consider these separately. */
 206         struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
 207
 208         /* The engine might want to adjust our probdist. */
 209         if (pp->callback)
 210                 pp->callback(pp->callback_data, b, to_play, pd);
 211
 212         if (PLDEBUGL(5)) {
 213                 fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 214         }
 215
 216 #define ignore_move(c_) do { \
 217         ignores[ignores_n++] = c_; \
 218         if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
 219                 /* Keep ignores[] sorted. We abuse the fact that we know \
 220                  * only one item can be out-of-order. */ \
 221                 coord_t cc = ignores[ignores_n - 2]; \
 222                 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
 223                 ignores[ignores_n - 1] = cc; \
 224         } \
 225         int rowi = coord_y(c_, pd->b); \
 226         lpd.browtotals_i[lpd.browtotals_n] = rowi; \
 227         lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
 228         probdist_mute(pd, c_); \
 229         if (PLDEBUGL(6)) \
 230                 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
 231 } while (0)
 232
 233         /* Make sure ko-prohibited move does not get picked. */
 234         if (!is_pass(b->ko.coord)) {
 235                 assert(b->ko.color == to_play);
 236                 ignore_move(b->ko.coord);
 237         }
 238
 239         /* Contiguity detection. */
 240         if (!is_pass(b->last_move.coord)) {
 241                 foreach_8neighbor(b, b->last_move.coord) {
 242                         if (c == b->ko.coord)
 243                                 continue; // already ignored
 244                         if (board_at(b, c) != S_NONE) {
 245                                 assert(probdist_one(pd, c) == 0);
 246                                 continue;
 247                         }
 248                         ignore_move(c);
 249
 250                         fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
 251                         lpd.coords[lpd.n] = c;
 252                         lpd.items[lpd.n++] = val;
 253                         lpd.total += val;
 254                 } foreach_8neighbor_end;
 255         }
 256
 257         ignores[ignores_n] = pass;
 258         if (PLDEBUGL(5))
 259                 fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
 260
 261         /* Verify sanity, possibly. */
 262         elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
 263
 264         /* Pick a move. */
 265         coord_t c = pass;
 266         fixp_t stab = fast_irandom(lpd.total + pd->total);
 267         if (PLDEBUGL(5))
 268                 fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
 269         if (stab < lpd.total) {
 270                 /* Local probdist. */
 271                 if (PLDEBUGL(6)) {
 272                         /* Some debug prints. */
 273                         fixp_t tot = 0;
 274                         for (int i = 0; i < lpd.n; i++) {
 275                                 tot += lpd.items[i];
 276                                 struct pattern p;
 277                                 struct move m = { .color = to_play, .coord = lpd.coords[i] };
 278                                 if (board_at(b, m.coord) != S_NONE) {
 279                                         assert(lpd.items[i] == 0);
 280                                         continue;
 281                                 }
 282                                 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
 283                                 char s[256] = ""; pattern2str(s, &p);
 284                                 fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
 285                                         coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
 286                                         fixp_to_double(tot), s,
 287                                         pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
 288                         }
 289                 }
 290                 for (int i = 0; i < lpd.n; i++) {
 291                         if (stab <= lpd.items[i]) {
 292                                 c = lpd.coords[i];
 293                                 break;
 294                         }
 295                         stab -= lpd.items[i];
 296                 }
 297                 if (is_pass(c)) {
 298                         fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
 299                         abort();
 300                 }
 301
 302         } else if (pd->total > 0) {
 303                 /* Global probdist. */
 304                 /* XXX: We re-stab inside. */
 305                 c = probdist_pick(pd, ignores);
 306
 307         } else {
 308                 if (PLDEBUGL(5))
 309                         fprintf(stderr, "ding!\n");
 310                 c = pass;
 311         }
 312
 313         /* Repair the damage. */
 314         if (pp->callback) {
 315                 /* XXX: Do something less horribly inefficient
 316                  * than just recomputing the whole pd. */
 317                 pd->total = 0;
 318                 for (int i = 0; i < board_size(pd->b); i++)
 319                         pd->rowtotals[i] = 0;
 320                 for (int i = 0; i < b->flen; i++) {
 321                         pd->items[b->f[i]] = 0;
 322                         board_gamma_update(b, b->f[i], to_play);
 323                 }
 324                 assert(pd->total == lpd.btotal);
 325
 326         } else {
 327                 pd->total = lpd.btotal;
 328                 /* If we touched a row multiple times (and we sure will),
 329                  * the latter value is obsolete; but since we go through
 330                  * the backups in reverse order, all is good. */
 331                 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
 332                         pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
 333         }
 334         return c;
 335 }
 336
 337 #else
 338
 339 coord_t
 340 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
 341 {
 342         struct elo_policy *pp = p->data;
 343         probdist_alloca(pd, b);
 344         elo_get_probdist(p, &pp->choose, b, to_play, &pd);
 345         if (pp->callback)
 346                 pp->callback(pp->callback_data, b, to_play, &pd);
 347         if (pd.total == 0)
 348                 return pass;
 349         int ignores[1] = { pass };
 350         coord_t c = probdist_pick(&pd, ignores);
 351         return c;
 352 }
 353
 354 #endif
 355
 356 void
 357 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
 358 {
 359         struct elo_policy *pp = p->data;
 360         probdist_alloca(pd, map->b);
 361
 362         int moves;
 363         moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
 364
 365         /* It is a question how to transform the gamma to won games; we use
 366          * a naive approach currently, but not sure how well it works. */
 367         /* TODO: Try sqrt(p), atan(p)/pi*2. */
 368
 369         double pd_best = 0;
 370         if (pp->assess_eval == EAV_BEST) {
 371                 for (int f = 0; f < map->b->flen; f++) {
 372                         double pd_one = fixp_to_double(probdist_one(&pd, map->b->f[f]));
 373                         if (pd_one > pd_best)
 374                                 pd_best = pd_one;
 375                 }
 376         }
 377         double pd_total = fixp_to_double(probdist_total(&pd));
 378
 379         for (int f = 0; f < map->b->flen; f++) {
 380                 coord_t c = map->b->f[f];
 381                 if (!map->consider[c])
 382                         continue;
 383
 384                 double pd_one = fixp_to_double(probdist_one(&pd, c));
 385                 double val = 0;
 386                 switch (pp->assess_eval) {
 387                 case EAV_TOTAL:
 388                         val = pd_one / pd_total;
 389                         break;
 390                 case EAV_BEST:
 391                         val = pd_one / pd_best;
 392                         break;
 393                 default:
 394                         assert(0);
 395                 }
 396
 397                 switch (pp->assess_transform) {
 398                 case EAT_LINEAR:
 399                         val = val;
 400                         break;
 401                 case EAT_ATAN:
 402                         val = atan(val)/M_PI;
 403                         break;
 404                 case EAT_SIGMOID:
 405                         val = 1.0 / (1.0 + exp(-pp->assess_sigmb * (val - 0.5)));
 406                         break;
 407                 default:
 408                         assert(0);
 409                 }
 410
 411                 add_prior_value(map, c, val, games);
 412         }
 413 }
 414
 415 void
 416 playout_elo_done(struct playout_policy *p)
 417 {
 418         struct elo_policy *pp = p->data;
 419         features_gamma_done(pp->choose.fg);
 420         features_gamma_done(pp->assess.fg);
 421 }
 422
 423
 424 void
 425 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
 426 {
 427         struct elo_policy *pp = p->data;
 428         pp->callback = callback;
 429         pp->callback_data = data;
 430 }
 431
 432 struct playout_policy *
 433 playout_elo_init(char *arg, struct board *b)
 434 {
 435         struct playout_policy *p = calloc2(1, sizeof(*p));
 436         struct elo_policy *pp = calloc2(1, sizeof(*pp));
 437         p->data = pp;
 438         p->choose = playout_elo_choose;
 439         p->assess = playout_elo_assess;
 440         p->done = playout_elo_done;
 441
 442         const char *gammafile = features_gamma_filename;
 443         pp->assess_sigmb = 10.0;
 444         /* Some defaults based on the table in Remi Coulom's paper. */
 445         pp->selfatari = 0.06;
 446
 447         struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
 448         int xspat = -1;
 449         bool precise_selfatari = false;
 450
 451         if (arg) {
 452                 char *optspec, *next = arg;
 453                 while (*next) {
 454                         optspec = next;
 455                         next += strcspn(next, ":");
 456                         if (*next) { *next++ = 0; } else { *next = 0; }
 457
 458                         char *optname = optspec;
 459                         char *optval = strchr(optspec, '=');
 460                         if (optval) *optval++ = 0;
 461
 462                         if (!strcasecmp(optname, "selfatari") && optval) {
 463                                 pp->selfatari = atof(optval);
 464                         } else if (!strcasecmp(optname, "precisesa")) {
 465                                 /* Use precise self-atari detection within
 466                                  * fast patterns. */
 467                                 precise_selfatari = !optval || atoi(optval);
 468                         } else if (!strcasecmp(optname, "gammafile") && optval) {
 469                                 /* patterns.gamma by default. We use this,
 470                                  * and need also ${gammafile}f (e.g.
 471                                  * patterns.gammaf) for fast (MC) features. */
 472                                 gammafile = strdup(optval);
 473                         } else if (!strcasecmp(optname, "xspat") && optval) {
 474                                 /* xspat==0: don't match spatial features
 475                                  * xspat==1: match *only* spatial features */
 476                                 xspat = atoi(optval);
 477                         } else if (!strcasecmp(optname, "assess_fastpat")) {
 478                                 /* Use just fast pattern set even for the
 479                                  * node prior value assessment. */
 480                                 pp->assess_fastpat = !optval || atoi(optval);
 481                         } else if (!strcasecmp(optname, "assess_sigmb") && optval) {
 482                                 pp->assess_sigmb = atof(optval);
 483                         } else if (!strcasecmp(optname, "assess_eval") && optval) {
 484                                 /* Evaluation method for prior node value
 485                                  * assessment. */
 486                                 if (!strcasecmp(optval, "total")) {
 487                                         /* Proportion prob/totprob. */
 488                                         pp->assess_eval = EAV_TOTAL;
 489                                 } else if (!strcasecmp(optval, "best")) {
 490                                         /* Proportion prob/bestprob. */
 491                                         pp->assess_eval = EAV_BEST;
 492                                 } else {
 493                                         fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
 494                                         exit(1);
 495                                 }
 496                         } else if (!strcasecmp(optname, "assess_transform") && optval) {
 497                                 /* Transformation of evaluation for prior
 498                                  * node value assessment. */
 499                                 if (!strcasecmp(optval, "linear")) {
 500                                         /* No additional transformation. */
 501                                         pp->assess_transform = EAT_LINEAR;
 502                                 } else if (!strcasecmp(optval, "atan")) {
 503                                         /* atan-shape transformation;
 504                                          * pumps up low values. */
 505                                         pp->assess_transform = EAT_ATAN;
 506                                 } else if (!strcasecmp(optval, "sigmoid")) {
 507                                         /* Sigmoid transformation
 508                                          * according to assess_sigmb. */
 509                                         pp->assess_transform = EAT_SIGMOID;
 510                                 } else {
 511                                         fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
 512                                         exit(1);
 513                                 }
 514                         } else {
 515                                 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
 516                                 exit(1);
 517                         }
 518                 }
 519         }
 520
 521         pc.spat_dict = spatial_dict_init(false);
 522
 523         /* In playouts, we need to operate with much smaller set of features
 524          * in order to keep reasonable speed. */
 525         /* TODO: Configurable. */ /* TODO: Tune. */
 526         pp->choose.pc = FAST_PATTERN_CONFIG;
 527         pp->choose.pc.spat_dict = pc.spat_dict;
 528         char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
 529         pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
 530         memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
 531         for (int i = 0; i < FEAT_MAX; i++)
 532                 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 533                         pp->choose.ps[i] = 0;
 534         if (precise_selfatari) {
 535                 pp->choose.ps[FEAT_SELFATARI] &= ~(1<<PF_SELFATARI_STUPID);
 536                 pp->choose.ps[FEAT_SELFATARI] |= (1<<PF_SELFATARI_SMART);
 537         }
 538         board_gamma_set(b, pp->choose.fg, precise_selfatari);
 539
 540         if (pp->assess_fastpat) {
 541                 pp->assess = pp->choose;
 542                 pp->assess.fg = features_gamma_init(&pp->assess.pc, cgammafile);
 543         } else {
 544                 /* More detailed set of features. */
 545                 pp->assess.pc = pc;
 546                 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
 547                 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
 548                 for (int i = 0; i < FEAT_MAX; i++)
 549                         if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
 550                                 pp->assess.ps[i] = 0;
 551         }
 552
 553         return p;
 554 }