1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
29 #include "patternsp.h"
31 #include "playout/elo.h"
34 #include "uct/prior.h"
36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
39 /* Note that the context can be shared by multiple threads! */
43 struct pattern_config pc
;
44 struct features_gamma
*fg
;
49 struct patternset choose
, assess
;
50 playout_elo_callbackp callback
; void *callback_data
;
54 /* This is the core of the policy - initializes and constructs the
55 * probability distribution over the move candidates. */
58 elo_get_probdist(struct playout_policy
*p
, struct patternset
*ps
, struct board
*b
, enum stone to_play
, struct probdist
*pd
)
60 //struct elo_policy *pp = p->data;
63 /* First, assign per-point probabilities. */
65 for (int f
= 0; f
< b
->flen
; f
++) {
66 struct move m
= { .coord
= b
->f
[f
], .color
= to_play
};
68 /* Skip pass (for now)? */
69 if (is_pass(m
.coord
)) {
71 probdist_set(pd
, m
.coord
, 0);
74 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
76 /* Skip invalid moves. */
77 if (!board_is_valid_move(b
, &m
))
80 /* We shall never fill our own single-point eyes. */
81 /* XXX: In some rare situations, this prunes the best move:
82 * Bulk-five nakade with eye at 1-1 point. */
83 if (board_is_one_point_eye(b
, m
.coord
, to_play
)) {
88 /* Each valid move starts with gamma 1. */
91 /* Some easy features: */
92 /* XXX: We just disable them for now since we call the
93 * pattern matcher; you need the gammas file. */
95 if (is_bad_selfatari(b
, to_play
, m
.coord
))
99 /* Match pattern features: */
101 pattern_match(&ps
->pc
, ps
->ps
, &p
, b
, &m
);
102 for (int i
= 0; i
< p
.n
; i
++) {
103 /* Multiply together gammas of all pattern features. */
104 double gamma
= feature_gamma(ps
->fg
, &p
.f
[i
], NULL
);
105 //char buf[256] = ""; feature2str(buf, &p.f[i]);
106 //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
110 probdist_set(pd
, m
.coord
, g
);
111 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), probdist_one(pd, m.coord), pd->items[f]);
121 coord_t coords
[LPD_MAX
];
122 double items
[LPD_MAX
];
129 elo_check_probdist(struct playout_policy
*p
, struct board
*b
, enum stone to_play
, struct probdist
*pd
, int *ignores
, struct lprobdist
*lpd
, coord_t lc
)
132 struct elo_policy
*pp
= p
->data
;
133 if (pd
->total
< PROBDIST_EPSILON
)
136 /* Compare to the manually created distribution. */
137 /* XXX: This is now broken if callback is used. */
139 double pdi
[board_size2(b
)]; memset(pdi
, 0, sizeof(pdi
));
140 struct probdist pdx
= { .n
= board_size2(b
), .items
= pdi
, .total
= 0 };
141 elo_get_probdist(p
, &pp
->choose
, b
, to_play
, &pdx
);
142 for (int i
= 0; i
< b
->flen
; i
++) {
144 if (is_pass(c
)) continue;
145 // XXX: Hardcoded ignores[] structure
146 if (ignores
[0] == c
) continue;
147 double val
= pd
->items
[c
];
148 if (!is_pass(lc
) && coord_is_8adjecent(lc
, c
, b
))
149 for (int j
= 0; j
< lpd
->n
; j
++)
150 if (lpd
->coords
[j
] == c
)
152 if (fabs(pdx
.items
[c
] - val
) < PROBDIST_EPSILON
)
154 printf("[%s %d] manual %f board %f ", coord2sstr(c
, b
), b
->pat3
[c
], pdx
.items
[c
], pd
->items
[c
]);
155 board_gamma_update(b
, c
, to_play
);
156 printf("plainboard %f\n", pd
->items
[c
]);
163 playout_elo_choose(struct playout_policy
*p
, struct board
*b
, enum stone to_play
)
165 struct elo_policy
*pp
= p
->data
;
166 /* The base board probdist. */
167 struct probdist
*pd
= &b
->prob
[to_play
- 1];
168 double pd_total
= pd
->total
; // precision backup
169 /* The list of moves we do not consider in pd. */
170 int ignores
[10]; int ignores_n
= 0;
171 /* The list of local moves; we consider these separately. */
172 struct lprobdist lpd
= { .n
= 0, .total
= 0 };
174 /* The engine might want to adjust our probdist. */
176 pp
->callback(pp
->callback_data
, b
, to_play
, pd
);
178 /* Make sure ko-prohibited move does not get picked. */
179 if (!is_pass(b
->ko
.coord
)) {
180 assert(b
->ko
.color
== to_play
);
181 ignores
[ignores_n
++] = b
->ko
.coord
;
182 probdist_mute(pd
, b
->ko
.coord
);
185 /* Contiguity detection. */
186 if (!is_pass(b
->last_move
.coord
)) {
187 foreach_8neighbor(b
, b
->last_move
.coord
) {
188 ignores
[ignores_n
++] = c
;
189 probdist_mute(pd
, c
);
191 double val
= probdist_one(pd
, c
) * b
->gamma
->gamma
[FEAT_CONTIGUITY
][1];
192 lpd
.coords
[lpd
.n
] = c
;
193 lpd
.items
[lpd
.n
++] = val
;
195 } foreach_8neighbor_end
;
198 ignores
[ignores_n
] = 0;
200 /* Verify sanity, possibly. */
201 elo_check_probdist(p
, b
, to_play
, pd
, ignores
, &lpd
, b
->last_move
.coord
);
205 double stab
= fast_frandom() * (lpd
.total
+ pd
->total
);
206 if (stab
< lpd
.total
- PROBDIST_EPSILON
) {
207 /* Local probdist. */
208 for (int i
= 0; i
< lpd
.n
; i
++) {
209 if (stab
<= lpd
.items
[i
]) {
213 stab
-= lpd
.items
[i
];
216 fprintf(stderr
, "elo: local overstab [%lf]\n", stab
);
220 } else if (pd
->total
>= PROBDIST_EPSILON
) {
221 /* Global probdist. */
222 /* XXX: We re-stab inside. */
223 c
= probdist_pick(pd
, ignores
);
229 /* Repair the damage. */
231 /* XXX: Do something less horribly inefficient
232 * than just recomputing the whole pd. */
234 for (int i
= 0; i
< b
->flen
; i
++) {
235 pd
->items
[b
->f
[i
]] = 0;
236 board_gamma_update(b
, b
->f
[i
], to_play
);
238 assert(fabs(pd
->total
- pd_total
) < PROBDIST_EPSILON
);
241 pd
->total
= pd_total
;
249 playout_elo_choose(struct playout_policy
*p
, struct board
*b
, enum stone to_play
)
251 struct elo_policy
*pp
= p
->data
;
252 double pdi
[board_size2(b
)]; memset(pdi
, 0, sizeof(pdi
));
253 struct probdist pd
= { .n
= board_size2(b
), .items
= pdi
, .total
= 0 };
254 elo_get_probdist(p
, &pp
->choose
, b
, to_play
, &pd
);
256 pp
->callback(pp
->callback_data
, b
, to_play
, &pd
);
257 if (pd
.total
< PROBDIST_EPSILON
)
259 int ignores
[1] = { 0 };
260 coord_t c
= probdist_pick(&pd
, ignores
);
267 playout_elo_assess(struct playout_policy
*p
, struct prior_map
*map
, int games
)
269 struct elo_policy
*pp
= p
->data
;
270 double pdi
[board_size2(map
->b
)]; memset(pdi
, 0, sizeof(pdi
));
271 struct probdist pd
= { .n
= board_size2(map
->b
), .items
= pdi
, .total
= 0 };
274 moves
= elo_get_probdist(p
, &pp
->assess
, map
->b
, map
->to_play
, &pd
);
276 /* It is a question how to transform the gamma to won games; we use
277 * a naive approach currently, but not sure how well it works. */
278 /* TODO: Try sqrt(p), atan(p)/pi*2. */
280 for (int f
= 0; f
< map
->b
->flen
; f
++) {
281 coord_t c
= map
->b
->f
[f
];
282 if (!map
->consider
[c
])
284 add_prior_value(map
, c
, probdist_one(&pd
, c
) / probdist_total(&pd
), games
);
289 playout_elo_done(struct playout_policy
*p
)
291 struct elo_policy
*pp
= p
->data
;
292 features_gamma_done(pp
->choose
.fg
);
293 features_gamma_done(pp
->assess
.fg
);
298 playout_elo_callback(struct playout_policy
*p
, playout_elo_callbackp callback
, void *data
)
300 struct elo_policy
*pp
= p
->data
;
301 pp
->callback
= callback
;
302 pp
->callback_data
= data
;
305 struct playout_policy
*
306 playout_elo_init(char *arg
, struct board
*b
)
308 struct playout_policy
*p
= calloc2(1, sizeof(*p
));
309 struct elo_policy
*pp
= calloc2(1, sizeof(*pp
));
311 p
->choose
= playout_elo_choose
;
312 p
->assess
= playout_elo_assess
;
313 p
->done
= playout_elo_done
;
315 const char *gammafile
= features_gamma_filename
;
316 /* Some defaults based on the table in Remi Coulom's paper. */
317 pp
->selfatari
= 0.06;
319 struct pattern_config pc
= DEFAULT_PATTERN_CONFIG
;
321 bool precise_selfatari
= false;
324 char *optspec
, *next
= arg
;
327 next
+= strcspn(next
, ":");
328 if (*next
) { *next
++ = 0; } else { *next
= 0; }
330 char *optname
= optspec
;
331 char *optval
= strchr(optspec
, '=');
332 if (optval
) *optval
++ = 0;
334 if (!strcasecmp(optname
, "selfatari") && optval
) {
335 pp
->selfatari
= atof(optval
);
336 } else if (!strcasecmp(optname
, "precisesa")) {
337 /* Use precise self-atari detection within
339 precise_selfatari
= !optval
|| atoi(optval
);
340 } else if (!strcasecmp(optname
, "gammafile") && optval
) {
341 /* patterns.gamma by default. We use this,
342 * and need also ${gammafile}f (e.g.
343 * patterns.gammaf) for fast (MC) features. */
344 gammafile
= strdup(optval
);
345 } else if (!strcasecmp(optname
, "xspat") && optval
) {
346 /* xspat==0: don't match spatial features
347 * xspat==1: match *only* spatial features */
348 xspat
= atoi(optval
);
350 fprintf(stderr
, "playout-elo: Invalid policy argument %s or missing value\n", optname
);
356 pc
.spat_dict
= spatial_dict_init(false);
359 pp
->assess
.fg
= features_gamma_init(&pp
->assess
.pc
, gammafile
);
360 memcpy(pp
->assess
.ps
, PATTERN_SPEC_MATCHALL
, sizeof(pattern_spec
));
361 for (int i
= 0; i
< FEAT_MAX
; i
++)
362 if ((xspat
== 0 && i
== FEAT_SPATIAL
) || (xspat
== 1 && i
!= FEAT_SPATIAL
))
363 pp
->assess
.ps
[i
] = 0;
365 /* In playouts, we need to operate with much smaller set of features
366 * in order to keep reasonable speed. */
367 /* TODO: Configurable. */ /* TODO: Tune. */
368 pp
->choose
.pc
= FAST_PATTERN_CONFIG
;
369 pp
->choose
.pc
.spat_dict
= pc
.spat_dict
;
370 char cgammafile
[256]; strcpy(stpcpy(cgammafile
, gammafile
), "f");
371 pp
->choose
.fg
= features_gamma_init(&pp
->choose
.pc
, cgammafile
);
372 memcpy(pp
->choose
.ps
, PATTERN_SPEC_MATCHFAST
, sizeof(pattern_spec
));
373 for (int i
= 0; i
< FEAT_MAX
; i
++)
374 if ((xspat
== 0 && i
== FEAT_SPATIAL
) || (xspat
== 1 && i
!= FEAT_SPATIAL
))
375 pp
->choose
.ps
[i
] = 0;
376 if (precise_selfatari
)
377 pp
->choose
.ps
[FEAT_SELFATARI
] = ~(1<<PF_SELFATARI_STUPID
);
378 board_gamma_set(b
, pp
->choose
.fg
, precise_selfatari
);