Elo: Add debug prints
[pachi.git] / playout / elo.c
blob8e1b7a9e732a80c22062ab3d3c9c00a97467d346
1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
12 * moves to be played.
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 #include <assert.h>
21 #include <math.h>
22 #include <stdio.h>
23 #include <stdlib.h>
25 //#define DEBUG
26 #include "board.h"
27 #include "debug.h"
28 #include "pattern.h"
29 #include "patternsp.h"
30 #include "playout.h"
31 #include "playout/elo.h"
32 #include "random.h"
33 #include "tactics.h"
34 #include "uct/prior.h"
36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
39 /* Note that the context can be shared by multiple threads! */
41 struct patternset {
42 pattern_spec ps;
43 struct pattern_config pc;
44 struct features_gamma *fg;
47 struct elo_policy {
48 float selfatari;
49 struct patternset choose, assess;
50 playout_elo_callbackp callback; void *callback_data;
54 /* This is the core of the policy - initializes and constructs the
55 * probability distribution over the move candidates. */
57 int
58 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
60 //struct elo_policy *pp = p->data;
61 int moves = 0;
63 /* First, assign per-point probabilities. */
65 for (int f = 0; f < b->flen; f++) {
66 struct move m = { .coord = b->f[f], .color = to_play };
68 /* Skip pass (for now)? */
69 if (is_pass(m.coord)) {
70 skip_move:
71 probdist_set(pd, m.coord, 0);
72 continue;
74 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
76 /* Skip invalid moves. */
77 if (!board_is_valid_move(b, &m))
78 goto skip_move;
80 /* We shall never fill our own single-point eyes. */
81 /* XXX: In some rare situations, this prunes the best move:
82 * Bulk-five nakade with eye at 1-1 point. */
83 if (board_is_one_point_eye(b, m.coord, to_play)) {
84 goto skip_move;
87 moves++;
88 /* Each valid move starts with gamma 1. */
89 double g = 1.f;
91 /* Some easy features: */
92 /* XXX: We just disable them for now since we call the
93 * pattern matcher; you need the gammas file. */
94 #if 0
95 if (is_bad_selfatari(b, to_play, m.coord))
96 g *= pp->selfatari;
97 #endif
99 /* Match pattern features: */
100 struct pattern p;
101 pattern_match(&ps->pc, ps->ps, &p, b, &m);
102 for (int i = 0; i < p.n; i++) {
103 /* Multiply together gammas of all pattern features. */
104 double gamma = feature_gamma(ps->fg, &p.f[i], NULL);
105 //char buf[256] = ""; feature2str(buf, &p.f[i]);
106 //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
107 g *= gamma;
110 probdist_set(pd, m.coord, g);
111 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, m.coord), pd->items[f]);
114 return moves;
118 struct lprobdist {
119 int n;
120 #define LPD_MAX 8
121 coord_t coords[LPD_MAX];
122 double items[LPD_MAX];
123 double total;
125 /* Backups of original totals for restoring. */
126 double btotal;
127 double browtotals_v[10];
128 int browtotals_i[10];
129 int browtotals_n;
132 #ifdef BOARD_GAMMA
134 static void
135 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
137 #if 0
138 struct elo_policy *pp = p->data;
139 if (pd->total < PROBDIST_EPSILON)
140 return;
142 /* Compare to the manually created distribution. */
143 /* XXX: This is now broken if callback is used. */
145 probdist_alloca(pdx, b);
146 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
147 for (int i = 0; i < b->flen; i++) {
148 coord_t c = b->f[i];
149 if (is_pass(c)) continue;
150 // XXX: Hardcoded ignores[] structure
151 if (ignores[0] == c) continue;
152 double val = pd->items[c];
153 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
154 for (int j = 0; j < lpd->n; j++)
155 if (lpd->coords[j] == c)
156 val = lpd->items[j];
157 if (fabs(pdx.items[c] - val) < PROBDIST_EPSILON)
158 continue;
159 printf("[%s %d] manual %f board %f ", coord2sstr(c, b), b->pat3[c], pdx.items[c], pd->items[c]);
160 board_gamma_update(b, c, to_play);
161 printf("plainboard %f\n", pd->items[c]);
162 assert(0);
164 #endif
167 coord_t
168 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
170 struct elo_policy *pp = p->data;
171 /* The base board probdist. */
172 struct probdist *pd = &b->prob[to_play - 1];
173 /* The list of moves we do not consider in pd. */
174 int ignores[10]; int ignores_n = 0;
175 /* The list of local moves; we consider these separately. */
176 struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
178 /* The engine might want to adjust our probdist. */
179 if (pp->callback)
180 pp->callback(pp->callback_data, b, to_play, pd);
182 if (PLDEBUGL(5)) {
183 board_print(b, stderr);
184 fprintf(stderr, "pd total pre %lf lpd %lf\n", pd->total, lpd.total);
187 #define ignore_move(c_) do { \
188 ignores[ignores_n++] = c_; \
189 if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
190 /* Keep ignores[] sorted. We abuse the fact that we know \
191 * only one item can be out-of-order. */ \
192 coord_t cc = ignores[ignores_n - 2]; \
193 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
194 ignores[ignores_n - 1] = cc; \
196 int rowi = coord_y(c_, pd->b); \
197 lpd.browtotals_i[lpd.browtotals_n] = rowi; \
198 lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
199 probdist_mute(pd, c_); \
200 if (PLDEBUGL(6)) \
201 fprintf(stderr, "ignored move %s(%lf) => tot pd %lf lpd %lf\n", coord2sstr(c_, pd->b), pd->items[c_], pd->total, lpd.total); \
202 } while (0)
204 /* Make sure ko-prohibited move does not get picked. */
205 if (!is_pass(b->ko.coord)) {
206 assert(b->ko.color == to_play);
207 ignore_move(b->ko.coord);
210 /* Contiguity detection. */
211 if (!is_pass(b->last_move.coord)) {
212 foreach_8neighbor(b, b->last_move.coord) {
213 ignore_move(c);
215 double val = probdist_one(pd, c) * b->gamma->gamma[FEAT_CONTIGUITY][1];
216 lpd.coords[lpd.n] = c;
217 lpd.items[lpd.n++] = val;
218 lpd.total += val;
219 } foreach_8neighbor_end;
222 ignores[ignores_n] = pass;
223 if (PLDEBUGL(5))
224 fprintf(stderr, "pd total post %lf lpd %lf\n", pd->total, lpd.total);
226 /* Verify sanity, possibly. */
227 elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
229 /* Pick a move. */
230 coord_t c = pass;
231 double stab = fast_frandom() * (lpd.total + pd->total);
232 if (PLDEBUGL(5))
233 fprintf(stderr, "stab %lf / (%lf + %lf)\n", stab, lpd.total, pd->total);
234 if (stab < lpd.total - PROBDIST_EPSILON) {
235 /* Local probdist. */
236 if (PLDEBUGL(6)) {
237 /* Some debug prints. */
238 double tot = 0;
239 for (int i = 0; i < lpd.n; i++) {
240 tot += lpd.items[i];
241 struct pattern p;
242 struct move m = { .color = to_play, .coord = lpd.coords[i] };
243 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
244 char s[256] = ""; pattern2str(s, &p);
245 fprintf(stderr, "coord %s <%lf> [tot %lf] %s (p3:%d)\n", coord2sstr(lpd.coords[i], b), lpd.items[i], tot, s, pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
248 for (int i = 0; i < lpd.n; i++) {
249 if (stab <= lpd.items[i]) {
250 c = lpd.coords[i];
251 break;
253 stab -= lpd.items[i];
255 if (is_pass(c)) {
256 fprintf(stderr, "elo: local overstab [%lf]\n", stab);
257 abort();
260 } else if (pd->total >= PROBDIST_EPSILON) {
261 /* Global probdist. */
262 /* XXX: We re-stab inside. */
263 c = probdist_pick(pd, ignores);
265 } else {
266 if (PLDEBUGL(5))
267 fprintf(stderr, "ding!\n");
268 c = pass;
271 /* Repair the damage. */
272 if (pp->callback) {
273 /* XXX: Do something less horribly inefficient
274 * than just recomputing the whole pd. */
275 pd->total = 0;
276 for (int i = 0; i < board_size(pd->b); i++)
277 pd->rowtotals[i] = 0;
278 for (int i = 0; i < b->flen; i++) {
279 pd->items[b->f[i]] = 0;
280 board_gamma_update(b, b->f[i], to_play);
282 assert(fabs(pd->total - lpd.btotal) < PROBDIST_EPSILON);
284 } else {
285 pd->total = lpd.btotal;
286 /* If we touched a row multiple times (and we sure will),
287 * the latter value is obsolete; but since we go through
288 * the backups in reverse order, all is good. */
289 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
290 pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
292 return c;
295 #else
297 coord_t
298 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
300 struct elo_policy *pp = p->data;
301 probdist_alloca(pd, b);
302 elo_get_probdist(p, &pp->choose, b, to_play, &pd);
303 if (pp->callback)
304 pp->callback(pp->callback_data, b, to_play, &pd);
305 if (pd.total < PROBDIST_EPSILON)
306 return pass;
307 int ignores[1] = { pass };
308 coord_t c = probdist_pick(&pd, ignores);
309 return c;
312 #endif
314 void
315 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
317 struct elo_policy *pp = p->data;
318 probdist_alloca(pd, map->b);
320 int moves;
321 moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
323 /* It is a question how to transform the gamma to won games; we use
324 * a naive approach currently, but not sure how well it works. */
325 /* TODO: Try sqrt(p), atan(p)/pi*2. */
327 for (int f = 0; f < map->b->flen; f++) {
328 coord_t c = map->b->f[f];
329 if (!map->consider[c])
330 continue;
331 add_prior_value(map, c, probdist_one(&pd, c) / probdist_total(&pd), games);
335 void
336 playout_elo_done(struct playout_policy *p)
338 struct elo_policy *pp = p->data;
339 features_gamma_done(pp->choose.fg);
340 features_gamma_done(pp->assess.fg);
344 void
345 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
347 struct elo_policy *pp = p->data;
348 pp->callback = callback;
349 pp->callback_data = data;
352 struct playout_policy *
353 playout_elo_init(char *arg, struct board *b)
355 struct playout_policy *p = calloc2(1, sizeof(*p));
356 struct elo_policy *pp = calloc2(1, sizeof(*pp));
357 p->data = pp;
358 p->choose = playout_elo_choose;
359 p->assess = playout_elo_assess;
360 p->done = playout_elo_done;
362 const char *gammafile = features_gamma_filename;
363 /* Some defaults based on the table in Remi Coulom's paper. */
364 pp->selfatari = 0.06;
366 struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
367 int xspat = -1;
368 bool precise_selfatari = false;
370 if (arg) {
371 char *optspec, *next = arg;
372 while (*next) {
373 optspec = next;
374 next += strcspn(next, ":");
375 if (*next) { *next++ = 0; } else { *next = 0; }
377 char *optname = optspec;
378 char *optval = strchr(optspec, '=');
379 if (optval) *optval++ = 0;
381 if (!strcasecmp(optname, "selfatari") && optval) {
382 pp->selfatari = atof(optval);
383 } else if (!strcasecmp(optname, "precisesa")) {
384 /* Use precise self-atari detection within
385 * fast patterns. */
386 precise_selfatari = !optval || atoi(optval);
387 } else if (!strcasecmp(optname, "gammafile") && optval) {
388 /* patterns.gamma by default. We use this,
389 * and need also ${gammafile}f (e.g.
390 * patterns.gammaf) for fast (MC) features. */
391 gammafile = strdup(optval);
392 } else if (!strcasecmp(optname, "xspat") && optval) {
393 /* xspat==0: don't match spatial features
394 * xspat==1: match *only* spatial features */
395 xspat = atoi(optval);
396 } else {
397 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
398 exit(1);
403 pc.spat_dict = spatial_dict_init(false);
405 pp->assess.pc = pc;
406 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
407 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
408 for (int i = 0; i < FEAT_MAX; i++)
409 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
410 pp->assess.ps[i] = 0;
412 /* In playouts, we need to operate with much smaller set of features
413 * in order to keep reasonable speed. */
414 /* TODO: Configurable. */ /* TODO: Tune. */
415 pp->choose.pc = FAST_PATTERN_CONFIG;
416 pp->choose.pc.spat_dict = pc.spat_dict;
417 char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
418 pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
419 memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
420 for (int i = 0; i < FEAT_MAX; i++)
421 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
422 pp->choose.ps[i] = 0;
423 if (precise_selfatari)
424 pp->choose.ps[FEAT_SELFATARI] = ~(1<<PF_SELFATARI_STUPID);
425 board_gamma_set(b, pp->choose.fg, precise_selfatari);
427 return p;