Introduce Joseki Scanner aux engine
[pachi.git] / uct / prior.c
blob41ab638ff63d5ca626eb010d20dabefb083382da
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "random.h"
11 #include "tactics.h"
12 #include "uct/internal.h"
13 #include "uct/plugins.h"
14 #include "uct/prior.h"
15 #include "uct/tree.h"
17 /* Applying heuristic values to the tree nodes, skewing the reading in
18 * most interesting directions. */
20 /* TODO: Introduce foreach_fpoint() to iterate only over non-occupied
21 * positions. */
23 struct uct_prior {
24 /* Equivalent experience for prior knowledge. MoGo paper recommends
25 * 50 playouts per source; in practice, esp. with RAVE, about 6
26 * playouts per source seems best. */
27 int eqex;
28 int even_eqex, policy_eqex, b19_eqex, eye_eqex, ko_eqex, plugin_eqex;
29 int cfgdn; int *cfgd_eqex;
32 void
33 uct_prior_even(struct uct *u, struct tree_node *node, struct prior_map *map)
35 /* Q_{even} */
36 /* This may be dubious for normal UCB1 but is essential for
37 * reading stability of RAVE, it appears. */
38 add_prior_value(map, pass, 0.5, u->prior->even_eqex);
39 foreach_free_point(map->b) {
40 if (!map->consider[c])
41 continue;
42 add_prior_value(map, c, 0.5, u->prior->even_eqex);
43 } foreach_free_point_end;
46 void
47 uct_prior_eye(struct uct *u, struct tree_node *node, struct prior_map *map)
49 /* Discourage playing into our own eyes. However, we cannot
50 * completely prohibit it:
51 * #######
52 * ...XX.#
53 * XOOOXX#
54 * X.OOOO#
55 * .XXXX.# */
56 foreach_free_point(map->b) {
57 if (!map->consider[c])
58 continue;
59 if (!board_is_one_point_eye(map->b, c, map->to_play))
60 continue;
61 add_prior_value(map, c, 0, u->prior->eye_eqex);
62 } foreach_free_point_end;
65 void
66 uct_prior_ko(struct uct *u, struct tree_node *node, struct prior_map *map)
68 /* Favor fighting ko, if we took it le 10 moves ago. */
69 coord_t ko = map->b->last_ko.coord;
70 if (is_pass(ko) || map->b->moves - map->b->last_ko_age > 10 || !map->consider[ko])
71 return;
72 // fprintf(stderr, "prior ko-fight @ %s %s\n", stone2str(map->to_play), coord2sstr(ko, map->b));
73 add_prior_value(map, ko, 1, u->prior->ko_eqex);
76 void
77 uct_prior_b19(struct uct *u, struct tree_node *node, struct prior_map *map)
79 /* Q_{b19} */
80 /* Specific hints for 19x19 board - priors for certain edge distances. */
81 foreach_free_point(map->b) {
82 if (!map->consider[c])
83 continue;
84 int d = coord_edge_distance(c, map->b);
85 if (d != 0 && d != 2)
86 continue;
87 /* The bonus applies only with no stones in immediate
88 * vincinity. */
89 if (board_stone_radar(map->b, c, 2))
90 continue;
91 /* First line: 0 */
92 /* Third line: 1 */
93 add_prior_value(map, c, d == 2, u->prior->b19_eqex);
94 } foreach_free_point_end;
97 void
98 uct_prior_playout(struct uct *u, struct tree_node *node, struct prior_map *map)
100 /* Q_{playout-policy} */
101 if (u->playout->assess)
102 u->playout->assess(u->playout, map, u->prior->policy_eqex);
105 void
106 uct_prior_cfgd(struct uct *u, struct tree_node *node, struct prior_map *map)
108 /* Q_{common_fate_graph_distance} */
109 /* Give bonus to moves local to the last move, where "local" means
110 * local in terms of groups, not just manhattan distance. */
111 if (is_pass(map->b->last_move.coord) || is_resign(map->b->last_move.coord))
112 return;
114 foreach_free_point(map->b) {
115 if (!map->consider[c])
116 continue;
117 if (map->distances[c] > u->prior->cfgdn)
118 continue;
119 assert(map->distances[c] != 0);
120 int bonus = u->prior->cfgd_eqex[map->distances[c]];
121 add_prior_value(map, c, 1, bonus);
122 } foreach_free_point_end;
125 void
126 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
128 if (u->prior->even_eqex)
129 uct_prior_even(u, node, map);
130 if (u->prior->eye_eqex)
131 uct_prior_eye(u, node, map);
132 if (u->prior->ko_eqex)
133 uct_prior_ko(u, node, map);
134 if (u->prior->b19_eqex)
135 uct_prior_b19(u, node, map);
136 if (u->prior->policy_eqex)
137 uct_prior_playout(u, node, map);
138 if (u->prior->cfgd_eqex)
139 uct_prior_cfgd(u, node, map);
140 if (u->prior->plugin_eqex)
141 plugin_prior(u->plugins, node, map, u->prior->plugin_eqex);
144 struct uct_prior *
145 uct_prior_init(char *arg, struct board *b)
147 struct uct_prior *p = calloc2(1, sizeof(struct uct_prior));
149 p->even_eqex = p->policy_eqex = p->b19_eqex = p->eye_eqex = p->ko_eqex = p->plugin_eqex = -1;
150 p->cfgdn = -1;
152 /* Even number! */
153 p->eqex = board_size(b)-2 >= 19 ? 20 : 14;
155 if (arg) {
156 char *optspec, *next = arg;
157 while (*next) {
158 optspec = next;
159 next += strcspn(next, ":");
160 if (*next) { *next++ = 0; } else { *next = 0; }
162 char *optname = optspec;
163 char *optval = strchr(optspec, '=');
164 if (optval) *optval++ = 0;
166 if (!strcasecmp(optname, "eqex") && optval) {
167 p->eqex = atoi(optval);
169 /* In the following settings, you can use -1 to
170 * set the prior to default eqex, or -2 to set
171 * it to the half of the default eqex. */
172 } else if (!strcasecmp(optname, "even") && optval) {
173 p->even_eqex = atoi(optval);
174 } else if (!strcasecmp(optname, "policy") && optval) {
175 p->policy_eqex = atoi(optval);
176 } else if (!strcasecmp(optname, "b19") && optval) {
177 p->b19_eqex = atoi(optval);
178 } else if (!strcasecmp(optname, "cfgd") && optval) {
179 /* cfgd=3%40%20%20 - 3 levels; immediate libs
180 * of last move => 40 wins, their neighbors
181 * 20 wins, 2nd-level neighbors 20 wins;
182 * neighbors are group-transitive. */
183 p->cfgdn = atoi(optval); optval += strcspn(optval, ":");
184 p->cfgd_eqex = calloc2(p->cfgdn + 1, sizeof(*p->cfgd_eqex));
185 p->cfgd_eqex[0] = 0;
186 for (int i = 1; *optval; i++, optval += strcspn(optval, ":")) {
187 optval++;
188 p->cfgd_eqex[i] = atoi(optval);
190 } else if (!strcasecmp(optname, "eye") && optval) {
191 p->eye_eqex = atoi(optval);
192 } else if (!strcasecmp(optname, "ko") && optval) {
193 p->ko_eqex = atoi(optval);
194 } else if (!strcasecmp(optname, "plugin") && optval) {
195 /* Unlike others, this is just a *recommendation*. */
196 p->plugin_eqex = atoi(optval);
197 } else {
198 fprintf(stderr, "uct: Invalid prior argument %s or missing value\n", optname);
199 exit(1);
204 if (p->even_eqex < 0) p->even_eqex = p->eqex / -p->even_eqex;
205 if (p->policy_eqex < 0) p->policy_eqex = p->eqex / -p->policy_eqex;
206 if (p->b19_eqex < 0) p->b19_eqex = p->eqex / -p->b19_eqex;
207 if (p->eye_eqex < 0) p->eye_eqex = p->eqex / -p->eye_eqex;
208 if (p->ko_eqex < 0) p->ko_eqex = p->eqex / -p->ko_eqex;
209 if (p->plugin_eqex < 0) p->plugin_eqex = p->eqex / -p->plugin_eqex;
211 if (p->cfgdn < 0) {
212 int bonuses[] = { 0, p->eqex, p->eqex / 2, p->eqex / 2 };
213 p->cfgdn = 3;
214 p->cfgd_eqex = calloc2(p->cfgdn + 1, sizeof(*p->cfgd_eqex));
215 memcpy(p->cfgd_eqex, bonuses, sizeof(bonuses));
217 if (p->cfgdn > TREE_NODE_D_MAX) {
218 fprintf(stderr, "uct: CFG distances only up to %d available\n", TREE_NODE_D_MAX);
219 exit(1);
222 return p;
225 void
226 uct_prior_done(struct uct_prior *p)
228 assert(p->cfgd_eqex);
229 free(p->cfgd_eqex);
230 free(p);