11 #include "uct/internal.h"
12 #include "uct/prior.h"
15 /* Applying heuristic values to the tree nodes, skewing the reading in
16 * most interesting directions. */
20 uct_prior(struct uct
*u
, struct tree
*tree
, struct tree_node
*node
,
21 struct board
*b
, enum stone color
, int parity
)
23 /* Initialization of UCT values based on prior knowledge */
26 /* This may be dubious for normal UCB1 but is essential for
27 * reading stability of RAVE, it appears. */
29 node
->prior
.playouts
+= u
->even_eqex
;
30 node
->prior
.wins
+= u
->even_eqex
/ 2;
33 /* Discourage playing into our own eyes. However, we cannot
34 * completely prohibit it:
40 if (board_is_one_point_eye(b
, &node
->coord
, color
)) {
41 node
->prior
.playouts
+= u
->eqex
;
42 node
->prior
.wins
+= tree_parity(tree
, parity
) > 0 ? 0 : u
->eqex
;
46 if (u
->gp_eqex
&& node
->parent
&& node
->parent
->parent
&& node
->parent
->parent
->parent
) {
47 struct tree_node
*gpp
= node
->parent
->parent
->parent
;
48 for (struct tree_node
*ni
= gpp
->children
; ni
; ni
= ni
->sibling
) {
49 /* Be careful not to emphasize too random results. */
50 if (ni
->coord
== node
->coord
&& ni
->u
.playouts
> u
->gp_eqex
) {
51 node
->prior
.playouts
+= u
->gp_eqex
;
52 node
->prior
.wins
+= u
->gp_eqex
* ni
->u
.wins
/ ni
->u
.playouts
;
58 /* Q_{playout-policy} */
61 struct playout_policy
*playout
= u
->playout
;
62 if (playout
->assess
) {
63 struct move m
= { node
->coord
, color
};
64 assess
= playout
->assess(playout
, b
, &m
, u
->policy_eqex
);
67 node
->prior
.playouts
+= abs(assess
);
68 /* Good moves for enemy are losses for us.
69 * We will properly maximize this in the UCB1
71 assess
*= tree_parity(tree
, parity
);
72 if (assess
> 0) node
->prior
.wins
+= assess
;
77 if (node
->prior
.playouts
) {
78 tree_update_node_value(node
, u
->amaf_prior
);
81 //fprintf(stderr, "%s,%s prior: %d/%d = %f (%f)\n", coord2sstr(node->parent->coord, b), coord2sstr(node->coord, b), node->prior.wins, node->prior.playouts, node->prior.value, assess);