Moggy alwaysccaprate: Introduce, off by default
[pachi/peepo.git] / playout / elo.c
blob644ec67cf1b4013d63eb61d2da365f9e5aac2a8f
1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
12 * moves to be played.
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 #include <assert.h>
21 #include <math.h>
22 #include <stdio.h>
23 #include <stdlib.h>
25 #define DEBUG
26 #include "board.h"
27 #include "debug.h"
28 #include "pattern.h"
29 #include "patternsp.h"
30 #include "playout.h"
31 #include "playout/elo.h"
32 #include "random.h"
33 #include "tactics.h"
34 #include "uct/prior.h"
36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
39 /* Note that the context can be shared by multiple threads! */
41 struct patternset {
42 pattern_spec ps;
43 struct pattern_config pc;
44 struct features_gamma *fg;
47 struct elo_policy {
48 float selfatari;
49 struct patternset choose, assess;
53 /* This is the core of the policy - initializes and constructs the
54 * probability distribution over the move candidates. */
56 int
57 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
59 //struct elo_policy *pp = p->data;
60 int moves = 0;
62 /* First, assign per-point probabilities. */
64 for (int f = 0; f < b->flen; f++) {
65 struct move m = { .coord = b->f[f], .color = to_play };
67 /* Skip pass (for now)? */
68 if (is_pass(m.coord)) {
69 skip_move:
70 probdist_set(pd, f, 0);
71 continue;
73 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
75 /* Skip invalid moves. */
76 if (!board_is_valid_move(b, &m))
77 goto skip_move;
79 /* We shall never fill our own single-point eyes. */
80 /* XXX: In some rare situations, this prunes the best move:
81 * Bulk-five nakade with eye at 1-1 point. */
82 if (board_is_one_point_eye(b, &m.coord, to_play)) {
83 goto skip_move;
86 moves++;
87 /* Each valid move starts with gamma 1. */
88 float g = 1.f;
90 /* Some easy features: */
91 /* XXX: We just disable them for now since we call the
92 * pattern matcher; you need the gammas file. */
93 #if 0
94 if (is_bad_selfatari(b, to_play, m.coord))
95 g *= pp->selfatari;
96 #endif
98 /* Match pattern features: */
99 struct pattern p;
100 pattern_match(&ps->pc, ps->ps, &p, b, &m);
101 for (int i = 0; i < p.n; i++) {
102 /* Multiply together gammas of all pattern features. */
103 float gamma = feature_gamma(ps->fg, &p.f[i], NULL);
104 //char buf[256] = ""; feature2str(buf, &p.f[i]);
105 //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
106 g *= gamma;
109 probdist_set(pd, f, g);
110 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, f), pd->items[f]);
113 return moves;
117 coord_t
118 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
120 struct elo_policy *pp = p->data;
121 #ifdef BOARD_GAMMA
122 struct probdist *pd = &b->prob[to_play - 1];
123 /* Make sure ko-prohibited move does not get picked. */
124 if (!is_pass(b->ko.coord)) {
125 assert(b->ko.color == to_play);
126 probdist_set(pd, b->ko.coord, 0);
128 /* Contiguity detection. */
129 if (!is_pass(b->last_move.coord)) {
130 foreach_8neighbor(b, b->last_move.coord) {
131 probdist_set(pd, c, pd->items[c] * b->gamma->gamma[FEAT_CONTIGUITY][1]);
132 } foreach_8neighbor_end;
134 #if 0
135 /* Compare to the manually created distribution. */
136 if (pd->total >= PROBDIST_EPSILON) {
137 float pdi[b->flen]; memset(pdi, 0, sizeof(pdi));
138 struct probdist pdx = { .n = b->flen, .items = pdi, .total = 0 };
139 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
140 for (int i = 0; i < pdx.n; i++) {
141 if (is_pass(b->f[i])) continue;
142 if (fabs(pdx.items[i] - pd->items[b->f[i]]) >= PROBDIST_EPSILON) {
143 printf("[%s %d] manual %f board %f ", coord2sstr(b->f[i], b), b->pat3[b->f[i]], pdx.items[i], pd->items[b->f[i]]);
144 board_gamma_update(b, b->f[i], to_play);
145 printf("plainboard %f\n", pd->items[b->f[i]]);
146 assert(0);
150 #endif
151 /* Pick a move. */
152 coord_t c = pd->total >= PROBDIST_EPSILON ? probdist_pick(pd) : pass;
153 /* Repair the damage. */
154 if (!is_pass(b->ko.coord))
155 board_gamma_update(b, b->ko.coord, to_play);
156 if (!is_pass(b->last_move.coord)) {
157 foreach_8neighbor(b, b->last_move.coord) {
158 board_gamma_update(b, c, to_play);
159 } foreach_8neighbor_end;
161 return c;
163 #else
164 float pdi[b->flen]; memset(pdi, 0, sizeof(pdi));
165 struct probdist pd = { .n = b->flen, .items = pdi, .total = 0 };
166 elo_get_probdist(p, &pp->choose, b, to_play, &pd);
167 if (pd.total < PROBDIST_EPSILON)
168 return pass;
169 int f = probdist_pick(&pd);
170 return b->f[f];
171 #endif
174 void
175 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
177 struct elo_policy *pp = p->data;
178 float pdi[map->b->flen]; memset(pdi, 0, sizeof(pdi));
179 struct probdist pd = { .n = map->b->flen, .items = pdi, .total = 0 };
181 int moves;
182 moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
184 /* It is a question how to transform the gamma to won games; we use
185 * a naive approach currently, but not sure how well it works. */
186 /* TODO: Try sqrt(p), atan(p)/pi*2. */
188 for (int f = 0; f < map->b->flen; f++) {
189 coord_t c = map->b->f[f];
190 if (!map->consider[c])
191 continue;
192 add_prior_value(map, c, probdist_one(&pd, f) / probdist_total(&pd), games);
196 void
197 playout_elo_done(struct playout_policy *p)
199 struct elo_policy *pp = p->data;
200 features_gamma_done(pp->choose.fg);
201 features_gamma_done(pp->assess.fg);
205 struct playout_policy *
206 playout_elo_init(char *arg, struct board *b)
208 struct playout_policy *p = calloc(1, sizeof(*p));
209 struct elo_policy *pp = calloc(1, sizeof(*pp));
210 p->data = pp;
211 p->choose = playout_elo_choose;
212 p->assess = playout_elo_assess;
213 p->done = playout_elo_done;
215 const char *gammafile = features_gamma_filename;
216 /* Some defaults based on the table in Remi Coulom's paper. */
217 pp->selfatari = 0.06;
219 struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
220 int xspat = -1;
222 if (arg) {
223 char *optspec, *next = arg;
224 while (*next) {
225 optspec = next;
226 next += strcspn(next, ":");
227 if (*next) { *next++ = 0; } else { *next = 0; }
229 char *optname = optspec;
230 char *optval = strchr(optspec, '=');
231 if (optval) *optval++ = 0;
233 if (!strcasecmp(optname, "selfatari") && optval) {
234 pp->selfatari = atof(optval);
235 } else if (!strcasecmp(optname, "gammafile") && optval) {
236 /* patterns.gamma by default. We use this,
237 * and need also ${gammafile}f (e.g.
238 * patterns.gammaf) for fast (MC) features. */
239 gammafile = strdup(optval);
240 } else if (!strcasecmp(optname, "xspat") && optval) {
241 /* xspat==0: don't match spatial features
242 * xspat==1: match *only* spatial features */
243 xspat = atoi(optval);
244 } else {
245 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
246 exit(1);
251 pc.spat_dict = spatial_dict_init(false);
253 pp->assess.pc = pc;
254 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
255 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
256 for (int i = 0; i < FEAT_MAX; i++)
257 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
258 pp->assess.ps[i] = 0;
260 /* In playouts, we need to operate with much smaller set of features
261 * in order to keep reasonable speed. */
262 /* TODO: Configurable. */ /* TODO: Tune. */
263 pp->choose.pc = FAST_PATTERN_CONFIG;
264 pp->choose.pc.spat_dict = pc.spat_dict;
265 char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
266 pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
267 memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
268 for (int i = 0; i < FEAT_MAX; i++)
269 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
270 pp->choose.ps[i] = 0;
271 board_gamma_set(b, pp->choose.fg);
273 return p;