uct_prior_one(): Fix grandparent heuristic
[pachi.git] / uct / prior.c
blobeb906f67724d7d2ebfa9d22a33fa2cb126f7a508
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "move.h"
10 #include "random.h"
11 #include "tactics.h"
12 #include "uct/internal.h"
13 #include "uct/prior.h"
14 #include "uct/tree.h"
16 /* Applying heuristic values to the tree nodes, skewing the reading in
17 * most interesting directions. */
20 void
21 uct_prior_one(struct uct *u, struct tree_node *node, struct prior_map *map, coord_t c)
23 /* Initialization of UCT values based on prior knowledge */
25 /* Q_{even} */
26 /* This may be dubious for normal UCB1 but is essential for
27 * reading stability of RAVE, it appears. */
28 if (u->even_eqex) {
29 map->prior[c].playouts += u->even_eqex;
30 map->prior[c].wins += u->even_eqex / 2;
33 /* Discourage playing into our own eyes. However, we cannot
34 * completely prohibit it:
35 * #######
36 * ...XX.#
37 * XOOOXX#
38 * X.OOOO#
39 * .XXXX.# */
40 if (board_is_one_point_eye(map->b, &c, map->to_play)) {
41 map->prior[c].playouts += u->eqex;
42 map->prior[c].wins += map->parity > 0 ? 0 : u->eqex;
45 /* Q_{b19} */
46 /* Specific hints for 19x19 board - priors for certain edge distances. */
47 if (u->b19_eqex) {
48 int d = coord_edge_distance(c, map->b);
49 if (d == 1 || d == 3) {
50 /* The bonus applies only with no stones in immediate
51 * vincinity. */
52 if (!board_stone_radar(map->b, c, 2)) {
53 /* First line: -eqex */
54 /* Third line: +eqex */
55 int v = d == 1 ? -1 : 1;
56 map->prior[c].playouts += u->b19_eqex;
57 map->prior[c].wins += map->parity * v > 0 ? u->b19_eqex : 0;
62 /* Q_{grandparent} */
63 if (u->gp_eqex && node->parent && node->parent->parent) {
64 struct tree_node *gpp = node->parent->parent;
65 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
66 /* Be careful not to emphasize too random results. */
67 if (ni->coord == node->coord && ni->u.playouts > u->gp_eqex) {
68 map->prior[c].playouts += u->gp_eqex;
69 map->prior[c].wins += u->gp_eqex * ni->u.wins / ni->u.playouts;
74 /* Q_{playout-policy} */
75 if (u->policy_eqex) {
76 int assess = 0;
77 if (u->playout->assess) {
78 struct move m = { c, map->to_play };
79 assess = u->playout->assess(u->playout, map->b, &m, u->policy_eqex);
81 if (assess) {
82 map->prior[c].playouts += abs(assess);
83 /* Good moves for enemy are losses for us.
84 * We will properly maximize this in the UCB1
85 * decision. */
86 assess *= map->parity;
87 if (assess > 0) map->prior[c].wins += assess;
92 void
93 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
95 uct_prior_one(u, node, map, pass);
96 foreach_point(map->b) {
97 if (!map->consider[c])
98 continue;
99 uct_prior_one(u, node, map, c);
100 } foreach_point_end;