Merge branch 'libmap' into goals
[pachi.git] / libmap.h
blobc1f30ec0206349e912cd9e8b5e47338f57458a6a
1 #ifndef PACHI_LIBMAP_H
2 #define PACHI_LIBMAP_H
4 /* "Liberty map" - description of a particular liberty structure of a group.
5 * The idea is that we can track local tactical effectivity of various moves
6 * within the particular liberty structure context. */
8 #include <assert.h>
9 #include <stdbool.h>
11 #include "board.h"
12 #include "mq.h"
13 #include "stats.h"
15 #define LM_DEBUG if (0)
17 hash_t group_to_libmap(struct board *b, group_t group);
20 /* Setup of everything libmap-related. */
22 extern struct libmap_config {
23 enum {
24 LMP_THRESHOLD,
25 LMP_UCB,
26 } pick_mode;
28 /* LMP_THRESHOLD: */
29 /* Preference for moves of tactical rating over this threshold
30 * (...or unrated moves). */
31 floating_t pick_threshold;
32 /* In given percentage of cases, pick move regardless of its
33 * tactical rating.*/
34 int pick_epsilon;
35 /* Whether to rather skip this heuristic altogether than play
36 * badly performing move. */
37 bool avoid_bad;
39 /* LMP_UCB: */
40 /* Exploration coefficient for the bandit. */
41 floating_t explore_p;
42 /* Default prior for considered moves. */
43 struct move_stats prior, tenuki_prior;
45 /* Whether to merge records for the same move taking care
46 * of different groups within the move queue. */
47 bool mq_merge_groups;
48 /* When checking move X, defending group A by counter-attacking
49 * group B, whether to use A, B or A^B as liberty map. */
50 enum {
51 LMC_DEFENSE = 1,
52 LMC_ATTACK = 2,
53 LMC_DEFENSE_ATTACK = 4,
54 } counterattack;
55 /* Whether to evaluate based on local or global result. */
56 enum {
57 LME_LOCAL,
58 LME_LVALUE,
59 LME_GLOBAL,
60 } eval;
61 /* Whether to also try and track tenuki moves. */
62 bool tenuki;
63 } libmap_config;
65 void libmap_setup(char *arg);
68 /* Our own version of move_queue, but including liberty maps of moves. */
69 /* The user will usually first create a queue of tactical goals and pick
70 * (using libmap_mq functions below), then add that one to libmap_hash's
71 * global move queue, processed at the end of the whole playout. */
73 struct libmap_group {
74 /* Group-relative tactical description of a move. */
75 group_t group;
76 hash_t hash;
77 enum stone goal;
80 struct libmap_mq {
81 struct move_queue mq;
82 enum stone color[MQL]; // complements mq.move
83 struct libmap_group group[MQL];
86 /* libmap_mq_pick() would be simple fast_random(mq.moves), but c.f.
87 * libmap_queue_mqpick() below. */
88 static void libmap_mq_add(struct libmap_mq *q, struct move m, unsigned char tag, struct libmap_group group);
89 static void libmap_mq_nodup(struct libmap_mq *q);
90 static void libmap_mq_print(struct libmap_mq *q, struct board *b, char *label);
93 /* Tactical application - hash structure storing info about move effectivity. */
95 struct libmap_move {
96 struct move move;
97 struct move_stats stats;
100 struct libmap_context {
101 hash_t hash;
102 int visits;
103 /* We add moves in multiple threads. But at most, on conflict we will
104 * end up with tiny amount of misappropriated playouts. */
105 int moves;
106 struct libmap_move move[GROUP_REFILL_LIBS];
109 struct libmap_hash {
110 struct board *b;
111 /* Multiple board instances may share the same libmap hash;
112 * on board_copy(), libmap is shared by default, so that all
113 * playouts reuse libmap of the master board. refcount keeps
114 * track of all the libmap uses in multi-thread environment. */
115 int refcount;
117 /* Stored statistics. */
118 /* We store statistics in a hash table without separated chains;
119 * if bucket is occupied, we look into the following ones,
120 * allowing up to libmap_hash_maxline subsequent checks. */
121 /* XXX: We mishandle hashes >= UINT64_MAX - libmap_hash_maxline. */
122 #define libmap_hash_bits 19
123 #define libmap_hash_size (1 << libmap_hash_bits)
124 #define libmap_hash_mask (libmap_hash_size - 1)
125 #define libmap_hash_maxline 32
126 struct libmap_context hash[libmap_hash_size];
129 /* Get a new libmap. */
130 struct libmap_hash *libmap_init(struct board *b);
131 /* Release libmap. Based on refcount, this will free it. */
132 void libmap_put(struct libmap_hash *lm);
134 /* Pick a move from @q, enqueue it in lmqueue and return its coordinate. */
135 static coord_t libmap_queue_mqpick(struct libmap_hash *lm, struct libmap_mq *lmqueue, struct libmap_mq *q);
136 /* Record queued moves in the hashtable based on final position of b and winner's color. */
137 void libmap_queue_process(struct libmap_hash *lm, struct libmap_mq *lmqueue, struct board *b, enum stone winner);
138 /* Add a result to the hashed statistics. */
139 void libmap_add_result(struct libmap_hash *lm, hash_t hash, struct move move, floating_t result, int playouts);
140 /* Get libmap context of a given group. */
141 static struct libmap_context *libmap_group_context(struct libmap_hash *lm, hash_t hash);
142 /* Get statistics of particular move in given libmap structure. */
143 static struct move_stats *libmap_move_stats(struct libmap_hash *lm, hash_t hash, struct move move);
144 /* Get statistics of particular move on given board. */
145 /* (Note that this is inherently imperfect as it does not take into account
146 * counter-atari moves.) */
147 struct move_stats libmap_board_move_stats(struct libmap_hash *lm, struct board *b, struct move move);
151 static inline void
152 libmap_mq_add(struct libmap_mq *q, struct move m, unsigned char tag, struct libmap_group group)
154 assert(q->mq.moves < MQL);
155 q->mq.tag[q->mq.moves] = tag;
156 q->mq.move[q->mq.moves] = m.coord;
157 q->color[q->mq.moves] = m.color;
158 q->group[q->mq.moves] = group;
159 q->mq.moves++;
162 static inline void
163 libmap_mq_nodup(struct libmap_mq *q)
165 for (unsigned int i = 1; i < 4; i++) {
166 if (q->mq.moves <= i)
167 return;
168 if (q->mq.move[q->mq.moves - 1 - i] == q->mq.move[q->mq.moves - 1]
169 && (libmap_config.mq_merge_groups
170 || (q->group[q->mq.moves - 1 - i].group == q->group[q->mq.moves - 1].group
171 && q->group[q->mq.moves - 1 - i].hash == q->group[q->mq.moves - 1].hash
172 && q->group[q->mq.moves - 1 - i].goal == q->group[q->mq.moves - 1].goal))) {
173 q->mq.tag[q->mq.moves - 1 - i] |= q->mq.tag[q->mq.moves - 1];
174 assert(q->color[q->mq.moves - 1 - i] == q->color[q->mq.moves - 1]);
175 q->mq.moves--;
176 return;
181 static inline void
182 libmap_mq_print(struct libmap_mq *q, struct board *b, char *label)
184 fprintf(stderr, "%s candidate moves: ", label);
185 for (unsigned int i = 0; i < q->mq.moves; i++) {
186 fprintf(stderr, "%s[%c:%s %"PRIhash"]", coord2sstr(q->mq.move[i], b),
187 /* attacker / defender */
188 board_at(b, q->group[i].group) == q->group[i].goal ? 'd' : 'a',
189 coord2sstr(q->group[i].group, b),
190 q->group[i].hash & libmap_hash_mask);
191 struct move m = { .coord = q->mq.move[i], .color = q->color[i] };
192 struct move_stats *ms = libmap_move_stats(b->libmap, q->group[i].hash, m);
193 if (ms) {
194 fprintf(stderr, "(%.3f/%d)", ms->value, ms->playouts);
196 fputc(' ', stderr);
198 fputc('\n', stderr);
202 static inline int
203 libmap_queue_mqpick_threshold(struct libmap_hash *lm, struct libmap_mq *q)
205 /* Pick random move, up to a simple check - if a move has tactical
206 * rating lower than threshold, prefer another. */
207 int p = fast_random(q->mq.moves);
208 if (fast_random(100) < libmap_config.pick_epsilon)
209 return p;
211 bool found = false;
212 unsigned int pp = p;
213 do {
214 int pm = p % q->mq.moves;
215 struct move m = { .coord = q->mq.move[pm], .color = q->color[pm] };
216 struct move_stats *ms = libmap_move_stats(lm, q->group[pm].hash, m);
217 if (!ms || ms->value >= libmap_config.pick_threshold) {
218 found = true;
219 break;
221 } while (++p % q->mq.moves < pp);
222 p %= q->mq.moves;
223 if (!found && libmap_config.avoid_bad)
224 return -1;
225 return p;
228 static inline int
229 libmap_queue_mqpick_ucb(struct libmap_hash *lm, struct libmap_mq *q)
231 int best_pa[BOARD_MAX_MOVES + 1], best_pn = 1;
232 floating_t best_urgency = -9999;
233 LM_DEBUG fprintf(stderr, "\tBandit: ");
235 for (unsigned int p = 0; p < q->mq.moves; p++) {
236 struct libmap_context *lc = libmap_group_context(lm, q->group[p].hash);
238 /* TODO: Consider all moves of this group,
239 * not just mq contents. */
240 struct move m = { .coord = q->mq.move[p], .color = q->color[p] };
241 struct move_stats s = !is_pass(m.coord) ? libmap_config.prior : libmap_config.tenuki_prior;
242 int group_visits = (lc ? lc->visits : 0) + s.playouts;
243 struct move_stats *ms = libmap_move_stats(lm, q->group[p].hash, m);
244 if (ms) stats_merge(&s, ms);
246 floating_t urgency = s.value + libmap_config.explore_p * sqrt(log(group_visits) / s.playouts);
247 LM_DEBUG fprintf(stderr, "%s[%.3f=%.3fx(%d/%d)] ", coord2sstr(m.coord, lm->b), urgency, s.value, group_visits, s.playouts);
248 if (urgency > best_urgency) {
249 best_pn = 1;
250 best_pa[0] = (int) p;
251 best_urgency = urgency;
253 } else if (urgency == best_urgency) {
254 best_pa[best_pn++] = (int) p;
258 int best_p = best_pa[fast_random(best_pn)];
259 assert(best_p >= 0);
260 LM_DEBUG fprintf(stderr, "\t=[%d]> %s\n", best_pn, coord2sstr(q->mq.move[best_p], lm->b));
261 return best_p;
264 static inline coord_t
265 libmap_queue_mqpick(struct libmap_hash *lm, struct libmap_mq *lmqueue, struct libmap_mq *q)
267 if (!q->mq.moves)
268 return pass; // nothing to do
270 /* Create a list of groups involved in the MQ. */
271 struct libmap_group *groups[MQL];
272 unsigned int groups_n = 0;
273 if (libmap_config.tenuki) {
274 for (unsigned int i = 0; i < q->mq.moves; i++) {
275 for (unsigned int j = 0; j < groups_n; j++)
276 if (q->group[i].hash == groups[j]->hash)
277 goto g_next_move;
278 groups[groups_n++] = &q->group[i];
279 g_next_move:;
283 /* Add tenuki move for each libmap group to the list of candidates. */
284 /* XXX: Can the color vary within the queue? */
285 if (libmap_config.tenuki) {
286 struct move tenuki = { .coord = pass, .color = q->color[0] };
287 for (unsigned int i = 0; i < groups_n; i++)
288 libmap_mq_add(q, tenuki, 0 /* XXX */, *groups[i]);
291 unsigned int p = 0;
292 if (q->mq.moves > 1) {
293 if (lm) {
294 switch (libmap_config.pick_mode) {
295 case LMP_THRESHOLD:
296 p = libmap_queue_mqpick_threshold(lm, q);
297 break;
298 case LMP_UCB:
299 p = libmap_queue_mqpick_ucb(lm, q);
300 break;
302 } else {
303 p = fast_random(q->mq.moves);
306 if (p < 0)
307 return pass;
309 if (lm) {
310 struct move m = { .coord = q->mq.move[p], .color = q->color[p] };
311 libmap_mq_add(lmqueue, m, q->mq.tag[p], q->group[p]);
314 return q->mq.move[p];
317 static inline struct libmap_context *
318 libmap_group_context(struct libmap_hash *lm, hash_t hash)
320 hash_t ih;
321 for (ih = hash; lm->hash[ih & libmap_hash_mask].hash != hash; ih++) {
322 if (lm->hash[ih & libmap_hash_mask].moves == 0)
323 return NULL;
324 if (ih >= hash + libmap_hash_maxline)
325 return NULL;
327 return &lm->hash[ih & libmap_hash_mask];
330 static inline struct move_stats *
331 libmap_move_stats(struct libmap_hash *lm, hash_t hash, struct move move)
333 struct libmap_context *lc = libmap_group_context(lm, hash);
334 if (!lc) return NULL;
335 for (int i = 0; i < lc->moves; i++) {
336 if (lc->move[i].move.coord == move.coord
337 && lc->move[i].move.color == move.color)
338 return &lc->move[i].stats;
340 return NULL;
343 #endif