UCT Prior: Overhaul configuration - separate context, internal option parsing, all...
[pachi.git] / uct / prior.c
bloba1a42c46c77d7f7759b2407d1d8ba94661f924cf
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "random.h"
11 #include "tactics.h"
12 #include "uct/internal.h"
13 #include "uct/prior.h"
14 #include "uct/tree.h"
16 /* Applying heuristic values to the tree nodes, skewing the reading in
17 * most interesting directions. */
20 struct uct_prior {
21 /* Equivalent experience for prior knowledge. MoGo paper recommends
22 * 50 playouts per source; in practice, esp. with RAVE, about 6
23 * playouts per source seems best. */
24 int eqex;
25 int even_eqex, gp_eqex, policy_eqex, b19_eqex, cfgd_eqex, eye_eqex, ko_eqex;
28 void
29 uct_prior_even(struct uct *u, struct tree_node *node, struct prior_map *map)
31 /* Q_{even} */
32 /* This may be dubious for normal UCB1 but is essential for
33 * reading stability of RAVE, it appears. */
34 foreach_point_and_pass(map->b) {
35 if (!map->consider[c])
36 continue;
37 add_prior_value(map, c, 0.5, u->prior->even_eqex);
38 } foreach_point_end;
41 void
42 uct_prior_eye(struct uct *u, struct tree_node *node, struct prior_map *map)
44 /* Discourage playing into our own eyes. However, we cannot
45 * completely prohibit it:
46 * #######
47 * ...XX.#
48 * XOOOXX#
49 * X.OOOO#
50 * .XXXX.# */
51 foreach_point(map->b) {
52 if (!map->consider[c])
53 continue;
54 if (!board_is_one_point_eye(map->b, &c, map->to_play))
55 continue;
56 add_prior_value(map, c, 0, u->prior->eye_eqex);
57 } foreach_point_end;
60 void
61 uct_prior_ko(struct uct *u, struct tree_node *node, struct prior_map *map)
63 /* Favor fighting ko, if we took it le 10 moves ago. */
64 coord_t ko = map->b->last_ko.coord;
65 if (is_pass(ko) || map->b->moves - map->b->last_ko_age > 10 || !map->consider[ko])
66 return;
67 // fprintf(stderr, "prior ko-fight @ %s %s\n", stone2str(map->to_play), coord2sstr(ko, map->b));
68 add_prior_value(map, ko, 1, u->prior->ko_eqex);
71 void
72 uct_prior_b19(struct uct *u, struct tree_node *node, struct prior_map *map)
74 /* Q_{b19} */
75 /* Specific hints for 19x19 board - priors for certain edge distances. */
76 foreach_point(map->b) {
77 if (!map->consider[c])
78 continue;
79 int d = coord_edge_distance(c, map->b);
80 if (d != 1 && d != 3)
81 continue;
82 /* The bonus applies only with no stones in immediate
83 * vincinity. */
84 if (board_stone_radar(map->b, c, 2))
85 continue;
86 /* First line: 0 */
87 /* Third line: 1 */
88 add_prior_value(map, c, d == 3, u->prior->b19_eqex);
89 } foreach_point_end;
92 void
93 uct_prior_grandparent(struct uct *u, struct tree_node *node, struct prior_map *map)
95 /* Q_{grandparent} */
96 foreach_point_and_pass(map->b) {
97 if (!map->consider[c])
98 continue;
99 if (!node->parent || !node->parent->parent)
100 continue;
101 struct tree_node *gpp = node->parent->parent;
102 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
103 /* Be careful not to emphasize too random results. */
104 if (ni->coord == node->coord && ni->u.playouts > u->prior->gp_eqex) {
105 /* We purposefuly ignore the parity. */
106 stats_add_result(&map->prior[c], ni->u.value, u->prior->gp_eqex);
109 } foreach_point_end;
112 void
113 uct_prior_playout(struct uct *u, struct tree_node *node, struct prior_map *map)
115 /* Q_{playout-policy} */
116 if (u->playout->assess)
117 u->playout->assess(u->playout, map, u->prior->policy_eqex);
120 void
121 uct_prior_cfgd(struct uct *u, struct tree_node *node, struct prior_map *map)
123 /* Q_{common_fate_graph_distance} */
124 /* Give bonus to moves local to the last move, where "local" means
125 * local in terms of groups, not just manhattan distance. */
126 if (is_pass(map->b->last_move.coord))
127 return;
129 int distances[board_size2(map->b)];
130 cfg_distances(map->b, map->b->last_move.coord, distances, 3);
131 foreach_point(map->b) {
132 if (!map->consider[c])
133 continue;
134 // fprintf(stderr, "distance %s-%s: %d\n", coord2sstr(map->b->last_move.coord, map->b), coord2sstr(c, map->b), distances[c]);
135 if (distances[c] > 3)
136 continue;
137 assert(distances[c] != 0);
138 int bonuses[] = { 0, u->prior->cfgd_eqex, u->prior->cfgd_eqex / 2, u->prior->cfgd_eqex / 2 };
139 int bonus = bonuses[distances[c]];
140 add_prior_value(map, c, 1, bonus);
141 } foreach_point_end;
144 void
145 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
147 if (u->prior->even_eqex)
148 uct_prior_even(u, node, map);
149 if (u->prior->eye_eqex)
150 uct_prior_eye(u, node, map);
151 if (u->prior->ko_eqex)
152 uct_prior_ko(u, node, map);
153 if (u->prior->b19_eqex)
154 uct_prior_b19(u, node, map);
155 if (u->prior->gp_eqex)
156 uct_prior_grandparent(u, node, map);
157 if (u->prior->policy_eqex)
158 uct_prior_playout(u, node, map);
159 if (u->prior->cfgd_eqex)
160 uct_prior_cfgd(u, node, map);
163 struct uct_prior *
164 uct_prior_init(char *arg)
166 struct uct_prior *p = calloc(1, sizeof(struct uct_prior));
168 // gp: 14 vs 0: 44% (+-3.5)
169 p->gp_eqex = p->ko_eqex = 0;
170 p->even_eqex = p->policy_eqex = p->b19_eqex = p->cfgd_eqex = p->eye_eqex = -1;
171 p->eqex = 6; /* Even number! */
173 if (arg) {
174 char *optspec, *next = arg;
175 while (*next) {
176 optspec = next;
177 next += strcspn(next, ":");
178 if (*next) { *next++ = 0; } else { *next = 0; }
180 char *optname = optspec;
181 char *optval = strchr(optspec, '=');
182 if (optval) *optval++ = 0;
184 if (!strcasecmp(optname, "eqex") && optval) {
185 p->eqex = atoi(optval);
187 } else if (!strcasecmp(optname, "even") && optval) {
188 p->even_eqex = atoi(optval);
189 } else if (!strcasecmp(optname, "gp") && optval) {
190 p->gp_eqex = atoi(optval);
191 } else if (!strcasecmp(optname, "policy") && optval) {
192 p->policy_eqex = atoi(optval);
193 } else if (!strcasecmp(optname, "b19") && optval) {
194 p->b19_eqex = atoi(optval);
195 } else if (!strcasecmp(optname, "cfgd") && optval) {
196 p->cfgd_eqex = atoi(optval);
197 } else if (!strcasecmp(optname, "eye") && optval) {
198 p->eye_eqex = atoi(optval);
199 } else if (!strcasecmp(optname, "ko") && optval) {
200 p->ko_eqex = atoi(optval);
201 } else {
202 fprintf(stderr, "uct: Invalid prior argument %s or missing value\n", optname);
203 exit(1);
208 if (p->even_eqex < 0) p->even_eqex = p->eqex;
209 if (p->gp_eqex < 0) p->gp_eqex = p->eqex;
210 if (p->policy_eqex < 0) p->policy_eqex = p->eqex;
211 if (p->b19_eqex < 0) p->b19_eqex = p->eqex;
212 if (p->cfgd_eqex < 0) p->cfgd_eqex = p->eqex;
214 return p;