12 #include "uct/internal.h"
13 #include "uct/prior.h"
16 /* Applying heuristic values to the tree nodes, skewing the reading in
17 * most interesting directions. */
21 /* Equivalent experience for prior knowledge. MoGo paper recommends
22 * 50 playouts per source; in practice, esp. with RAVE, about 6
23 * playouts per source seems best. */
25 int even_eqex
, gp_eqex
, policy_eqex
, b19_eqex
, cfgd_eqex
, eye_eqex
, ko_eqex
;
29 uct_prior_even(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
32 /* This may be dubious for normal UCB1 but is essential for
33 * reading stability of RAVE, it appears. */
34 foreach_point_and_pass(map
->b
) {
35 if (!map
->consider
[c
])
37 add_prior_value(map
, c
, 0.5, u
->prior
->even_eqex
);
42 uct_prior_eye(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
44 /* Discourage playing into our own eyes. However, we cannot
45 * completely prohibit it:
51 foreach_point(map
->b
) {
52 if (!map
->consider
[c
])
54 if (!board_is_one_point_eye(map
->b
, &c
, map
->to_play
))
56 add_prior_value(map
, c
, 0, u
->prior
->eye_eqex
);
61 uct_prior_ko(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
63 /* Favor fighting ko, if we took it le 10 moves ago. */
64 coord_t ko
= map
->b
->last_ko
.coord
;
65 if (is_pass(ko
) || map
->b
->moves
- map
->b
->last_ko_age
> 10 || !map
->consider
[ko
])
67 // fprintf(stderr, "prior ko-fight @ %s %s\n", stone2str(map->to_play), coord2sstr(ko, map->b));
68 add_prior_value(map
, ko
, 1, u
->prior
->ko_eqex
);
72 uct_prior_b19(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
75 /* Specific hints for 19x19 board - priors for certain edge distances. */
76 foreach_point(map
->b
) {
77 if (!map
->consider
[c
])
79 int d
= coord_edge_distance(c
, map
->b
);
82 /* The bonus applies only with no stones in immediate
84 if (board_stone_radar(map
->b
, c
, 2))
88 add_prior_value(map
, c
, d
== 3, u
->prior
->b19_eqex
);
93 uct_prior_grandparent(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
96 foreach_point_and_pass(map
->b
) {
97 if (!map
->consider
[c
])
99 if (!node
->parent
|| !node
->parent
->parent
)
101 struct tree_node
*gpp
= node
->parent
->parent
;
102 for (struct tree_node
*ni
= gpp
->children
; ni
; ni
= ni
->sibling
) {
103 /* Be careful not to emphasize too random results. */
104 if (ni
->coord
== node
->coord
&& ni
->u
.playouts
> u
->prior
->gp_eqex
) {
105 /* We purposefuly ignore the parity. */
106 stats_add_result(&map
->prior
[c
], ni
->u
.value
, u
->prior
->gp_eqex
);
113 uct_prior_playout(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
115 /* Q_{playout-policy} */
116 if (u
->playout
->assess
)
117 u
->playout
->assess(u
->playout
, map
, u
->prior
->policy_eqex
);
121 uct_prior_cfgd(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
123 /* Q_{common_fate_graph_distance} */
124 /* Give bonus to moves local to the last move, where "local" means
125 * local in terms of groups, not just manhattan distance. */
126 if (is_pass(map
->b
->last_move
.coord
))
129 int distances
[board_size2(map
->b
)];
130 cfg_distances(map
->b
, map
->b
->last_move
.coord
, distances
, 3);
131 foreach_point(map
->b
) {
132 if (!map
->consider
[c
])
134 // fprintf(stderr, "distance %s-%s: %d\n", coord2sstr(map->b->last_move.coord, map->b), coord2sstr(c, map->b), distances[c]);
135 if (distances
[c
] > 3)
137 assert(distances
[c
] != 0);
138 int bonuses
[] = { 0, u
->prior
->cfgd_eqex
, u
->prior
->cfgd_eqex
/ 2, u
->prior
->cfgd_eqex
/ 2 };
139 int bonus
= bonuses
[distances
[c
]];
140 add_prior_value(map
, c
, 1, bonus
);
145 uct_prior(struct uct
*u
, struct tree_node
*node
, struct prior_map
*map
)
147 if (u
->prior
->even_eqex
)
148 uct_prior_even(u
, node
, map
);
149 if (u
->prior
->eye_eqex
)
150 uct_prior_eye(u
, node
, map
);
151 if (u
->prior
->ko_eqex
)
152 uct_prior_ko(u
, node
, map
);
153 if (u
->prior
->b19_eqex
)
154 uct_prior_b19(u
, node
, map
);
155 if (u
->prior
->gp_eqex
)
156 uct_prior_grandparent(u
, node
, map
);
157 if (u
->prior
->policy_eqex
)
158 uct_prior_playout(u
, node
, map
);
159 if (u
->prior
->cfgd_eqex
)
160 uct_prior_cfgd(u
, node
, map
);
164 uct_prior_init(char *arg
)
166 struct uct_prior
*p
= calloc(1, sizeof(struct uct_prior
));
168 // gp: 14 vs 0: 44% (+-3.5)
169 p
->gp_eqex
= p
->ko_eqex
= 0;
170 p
->even_eqex
= p
->policy_eqex
= p
->b19_eqex
= p
->cfgd_eqex
= p
->eye_eqex
= -1;
171 p
->eqex
= 6; /* Even number! */
174 char *optspec
, *next
= arg
;
177 next
+= strcspn(next
, ":");
178 if (*next
) { *next
++ = 0; } else { *next
= 0; }
180 char *optname
= optspec
;
181 char *optval
= strchr(optspec
, '=');
182 if (optval
) *optval
++ = 0;
184 if (!strcasecmp(optname
, "eqex") && optval
) {
185 p
->eqex
= atoi(optval
);
187 } else if (!strcasecmp(optname
, "even") && optval
) {
188 p
->even_eqex
= atoi(optval
);
189 } else if (!strcasecmp(optname
, "gp") && optval
) {
190 p
->gp_eqex
= atoi(optval
);
191 } else if (!strcasecmp(optname
, "policy") && optval
) {
192 p
->policy_eqex
= atoi(optval
);
193 } else if (!strcasecmp(optname
, "b19") && optval
) {
194 p
->b19_eqex
= atoi(optval
);
195 } else if (!strcasecmp(optname
, "cfgd") && optval
) {
196 p
->cfgd_eqex
= atoi(optval
);
197 } else if (!strcasecmp(optname
, "eye") && optval
) {
198 p
->eye_eqex
= atoi(optval
);
199 } else if (!strcasecmp(optname
, "ko") && optval
) {
200 p
->ko_eqex
= atoi(optval
);
202 fprintf(stderr
, "uct: Invalid prior argument %s or missing value\n", optname
);
208 if (p
->even_eqex
< 0) p
->even_eqex
= p
->eqex
;
209 if (p
->gp_eqex
< 0) p
->gp_eqex
= p
->eqex
;
210 if (p
->policy_eqex
< 0) p
->policy_eqex
= p
->eqex
;
211 if (p
->b19_eqex
< 0) p
->b19_eqex
= p
->eqex
;
212 if (p
->cfgd_eqex
< 0) p
->cfgd_eqex
= p
->eqex
;
213 if (p
->eye_eqex
< 0) p
->eye_eqex
= p
->eqex
;
214 if (p
->ko_eqex
< 0) p
->ko_eqex
= p
->eqex
;