12 #include "uct/internal.h"
13 #include "uct/prior.h"
16 /* Applying heuristic values to the tree nodes, skewing the reading in
17 * most interesting directions. */
21 /* Equivalent experience for prior knowledge. MoGo paper recommends
22 * 50 playouts per source; in practice, esp. with RAVE, about 6
23 * playouts per source seems best. */
25 int even_eqex
, policy_eqex
, b19_eqex
, eye_eqex
, ko_eqex
;
26 int cfgdn
; int *cfgd_eqex
;
30 uct_prior_even(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
33 /* This may be dubious for normal UCB1 but is essential for
34 * reading stability of RAVE, it appears. */
35 foreach_point_and_pass(map
->b
) {
36 if (!map
->consider
[c
])
38 add_prior_value(map
, c
, 0.5, u
->prior
->even_eqex
);
43 uct_prior_eye(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
45 /* Discourage playing into our own eyes. However, we cannot
46 * completely prohibit it:
52 foreach_point(map
->b
) {
53 if (!map
->consider
[c
])
55 if (!board_is_one_point_eye(map
->b
, &c
, map
->to_play
))
57 add_prior_value(map
, c
, 0, u
->prior
->eye_eqex
);
62 uct_prior_ko(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
64 /* Favor fighting ko, if we took it le 10 moves ago. */
65 coord_t ko
= map
->b
->last_ko
.coord
;
66 if (is_pass(ko
) || map
->b
->moves
- map
->b
->last_ko_age
> 10 || !map
->consider
[ko
])
68 // fprintf(stderr, "prior ko-fight @ %s %s\n", stone2str(map->to_play), coord2sstr(ko, map->b));
69 add_prior_value(map
, ko
, 1, u
->prior
->ko_eqex
);
73 uct_prior_b19(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
76 /* Specific hints for 19x19 board - priors for certain edge distances. */
77 foreach_point(map
->b
) {
78 if (!map
->consider
[c
])
80 int d
= coord_edge_distance(c
, map
->b
);
83 /* The bonus applies only with no stones in immediate
85 if (board_stone_radar(map
->b
, c
, 2))
89 add_prior_value(map
, c
, d
== 2, u
->prior
->b19_eqex
);
94 uct_prior_playout(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
96 /* Q_{playout-policy} */
97 if (u
->playout
->assess
)
98 u
->playout
->assess(u
->playout
, map
, u
->prior
->policy_eqex
);
102 uct_prior_cfgd(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
104 /* Q_{common_fate_graph_distance} */
105 /* Give bonus to moves local to the last move, where "local" means
106 * local in terms of groups, not just manhattan distance. */
107 if (is_pass(map
->b
->last_move
.coord
) || is_resign(map
->b
->last_move
.coord
))
110 foreach_point(map
->b
) {
111 if (!map
->consider
[c
])
113 if (map
->distances
[c
] > u
->prior
->cfgdn
)
115 assert(map
->distances
[c
] != 0);
116 int bonus
= u
->prior
->cfgd_eqex
[map
->distances
[c
]];
117 add_prior_value(map
, c
, 1, bonus
);
122 uct_prior(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
124 if (u
->prior
->even_eqex
)
125 uct_prior_even(u
, node
, map
);
126 if (u
->prior
->eye_eqex
)
127 uct_prior_eye(u
, node
, map
);
128 if (u
->prior
->ko_eqex
)
129 uct_prior_ko(u
, node
, map
);
130 if (u
->prior
->b19_eqex
)
131 uct_prior_b19(u
, node
, map
);
132 if (u
->prior
->policy_eqex
)
133 uct_prior_playout(u
, node
, map
);
134 if (u
->prior
->cfgd_eqex
)
135 uct_prior_cfgd(u
, node
, map
);
139 uct_prior_init(char *arg
, struct board
*b
)
141 struct uct_prior
*p
= calloc(1, sizeof(struct uct_prior
));
144 p
->even_eqex
= p
->policy_eqex
= p
->b19_eqex
= p
->eye_eqex
= -1;
148 p
->eqex
= board_size(b
)-2 >= 19 ? 20 : 14;
151 char *optspec
, *next
= arg
;
154 next
+= strcspn(next
, ":");
155 if (*next
) { *next
++ = 0; } else { *next
= 0; }
157 char *optname
= optspec
;
158 char *optval
= strchr(optspec
, '=');
159 if (optval
) *optval
++ = 0;
161 if (!strcasecmp(optname
, "eqex") && optval
) {
162 p
->eqex
= atoi(optval
);
164 } else if (!strcasecmp(optname
, "even") && optval
) {
165 p
->even_eqex
= atoi(optval
);
166 } else if (!strcasecmp(optname
, "policy") && optval
) {
167 p
->policy_eqex
= atoi(optval
);
168 } else if (!strcasecmp(optname
, "b19") && optval
) {
169 p
->b19_eqex
= atoi(optval
);
170 } else if (!strcasecmp(optname
, "cfgd") && optval
) {
171 /* cfgd=3%40%20%20 - 3 levels; immediate libs
172 * of last move => 40 wins, their neighbors
173 * 20 wins, 2nd-level neighbors 20 wins;
174 * neighbors are group-transitive. */
175 p
->cfgdn
= atoi(optval
); optval
+= strcspn(optval
, ":");
176 p
->cfgd_eqex
= calloc(p
->cfgdn
+ 1, sizeof(*p
->cfgd_eqex
));
178 for (int i
= 1; *optval
; i
++, optval
+= strcspn(optval
, ":")) {
180 p
->cfgd_eqex
[i
] = atoi(optval
);
182 } else if (!strcasecmp(optname
, "eye") && optval
) {
183 p
->eye_eqex
= atoi(optval
);
184 } else if (!strcasecmp(optname
, "ko") && optval
) {
185 p
->ko_eqex
= atoi(optval
);
187 fprintf(stderr
, "uct: Invalid prior argument %s or missing value\n", optname
);
193 if (p
->even_eqex
< 0) p
->even_eqex
= p
->eqex
/ -p
->even_eqex
;
194 if (p
->policy_eqex
< 0) p
->policy_eqex
= p
->eqex
/ -p
->policy_eqex
;
195 if (p
->b19_eqex
< 0) p
->b19_eqex
= p
->eqex
/ -p
->b19_eqex
;
196 if (p
->eye_eqex
< 0) p
->eye_eqex
= p
->eqex
/ -p
->eye_eqex
;
197 if (p
->ko_eqex
< 0) p
->ko_eqex
= p
->eqex
/ -p
->ko_eqex
;
200 int bonuses
[] = { 0, p
->eqex
, p
->eqex
/ 2, p
->eqex
/ 2 };
202 p
->cfgd_eqex
= calloc(p
->cfgdn
+ 1, sizeof(*p
->cfgd_eqex
));
203 memcpy(p
->cfgd_eqex
, bonuses
, sizeof(bonuses
));
205 if (p
->cfgdn
> TREE_NODE_D_MAX
) {
206 fprintf(stderr
, "uct: CFG distances only up to %d available\n", TREE_NODE_D_MAX
);
214 uct_prior_done(struct uct_prior
*p
)
216 assert(p
->cfgd_eqex
);